List of usage examples for org.apache.commons.cli2.builder DefaultOptionBuilder DefaultOptionBuilder
public DefaultOptionBuilder()
From source file:org.apache.mahout.classifier.svm.algorithm.parallelalgorithms.ParallelClassifierDriver.java
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException, OptionException { // example args: // -if /user/maximzhao/dataset/rcv1_test.binary -of // /user/maximzhao/rcv.result // -m /user/maximzhao/rcv1.model -nor 1 -ms 241572968 -mhs -Xmx500M -ttt // 1080/* w w w . j av a 2 s . c o m*/ log.info("[job] " + JOB_NAME); DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option testFileOpt = obuilder.withLongName("testFile").withRequired(true) .withArgument(abuilder.withName("testFile").withMinimum(1).withMaximum(1).create()) .withDescription("Name of test data file (default = noTestFile)").withShortName("if").create(); Option outputFileOpt = obuilder.withLongName("output").withRequired(true) .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create()) .withDescription("Out put file name: ").withShortName("of").create(); Option hdfsServerOpt = obuilder.withLongName("HDFSServer").withRequired(false) .withArgument(abuilder.withName("HDFSServer").withMinimum(1).withMaximum(1).create()) .withDescription("HDFS Server's Address (default = null) ").withShortName("hdfs").create(); Option modelFileOpt = obuilder.withLongName("modelFile").withRequired(true) .withArgument(abuilder.withName("modelFile").withMinimum(1).withMaximum(1).create()) .withDescription("Name of model file (default = noModelFile) ").withShortName("m").create(); Option mapSplitSizeOpt = obuilder.withLongName("mapSplitSize").withRequired(false) .withArgument(abuilder.withName("mapSplitSize").withMinimum(1).withMaximum(1).create()) .withDescription("Max map Split size ").withShortName("ms").create(); Option maxHeapSizeOpt = obuilder.withLongName("maxHeapSize").withRequired(false) .withArgument(abuilder.withName("maxHeapSize").withMinimum(1).withMaximum(1).create()) .withDescription("Max Heap Size: ").withShortName("mhs").create(); Option numberofReducersOpt = obuilder.withLongName("numberofReducers").withRequired(false) .withArgument(abuilder.withName("numberofReducers").withMinimum(1).withMaximum(1).create()) .withDescription("Number of Reducers: (defaults = 0)").withShortName("nor").create(); Option taskTimeoutOpt = obuilder.withLongName("taskTimeout").withRequired(false) .withArgument(abuilder.withName("taskTimeout").withMinimum(1).withMaximum(1).create()) .withDescription("Task Time out ( Minutes ) : ").withShortName("ttt").create(); Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h") .create(); Group group = gbuilder.withName("Options").withOption(modelFileOpt).withOption(testFileOpt) .withOption(mapSplitSizeOpt).withOption(hdfsServerOpt).withOption(outputFileOpt) .withOption(maxHeapSizeOpt).withOption(taskTimeoutOpt).withOption(numberofReducersOpt) .withOption(helpOpt).create(); SVMParameters para = new SVMParameters(); try { Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return; } para.setTestFile(cmdLine.getValue(testFileOpt).toString()); para.setOutFile(cmdLine.getValue(outputFileOpt).toString()); para.setModelFileName(cmdLine.getValue(modelFileOpt).toString()); // hdfs server address if (cmdLine.hasOption(hdfsServerOpt)) { para.setHdfsServerAddr(cmdLine.getValue(hdfsServerOpt).toString()); } if (cmdLine.hasOption(mapSplitSizeOpt)) { para.setMapSplitSize(Long.parseLong(cmdLine.getValue(mapSplitSizeOpt).toString())); } if (cmdLine.hasOption(numberofReducersOpt)) { para.setNumberReducers(Integer.parseInt(cmdLine.getValue(numberofReducersOpt).toString())); } if (cmdLine.hasOption(maxHeapSizeOpt)) { para.setMaxHeapSize(cmdLine.getValue(maxHeapSizeOpt).toString()); } if (cmdLine.hasOption(taskTimeoutOpt)) { para.setTaskTimeout(Long.parseLong(cmdLine.getValue(taskTimeoutOpt).toString())); } } catch (OptionException e) { log.error("Exception", e); CommandLineUtil.printHelp(group); } // set parameters for the mapper, combiner, reducer // creat a job Job job = new Job(new Configuration()); // step 1.1 set job static parameters ParallelClassifierJob.setJobParameters(job); // step 1.2 set mapper parameters ParallelClassifierJob.setMapperParameters(job.getConfiguration(), para.getHdfsServerAddr(), para.getModelFileName()); // set general parameters related to a job MapReduceUtil.setJobParameters(job, para.getTestFile(), para.getOutFile(), para.getMapSplitSize(), para.getNumberReducers(), para.getMaxHeapSize(), para.getTaskTimeout()); // submit a job log.info("job completed: " + MapReduceUtil.submitJob(job)); }
From source file:org.apache.mahout.classifier.svm.algorithm.parallelalgorithms.ParallelMultiClassifierTrainDriver.java
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException, OptionException { // args = new String [] {"-if","infile","-of","outfile","m", // "-nm","10","--nr","11"}; log.info("[job] " + JOB_NAME); DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option trainFileOpt = obuilder.withLongName("trainFile").withRequired(true) .withArgument(abuilder.withName("trainFile").withMinimum(1).withMaximum(1).create()) .withDescription("Training data set file").withShortName("if").create(); Option outputFileOpt = obuilder.withLongName("output").withRequired(true) .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create()) .withDescription("Out put file name: ").withShortName("of").create(); Option lambdaOpt = obuilder.withLongName("lambda").withRequired(false) .withArgument(abuilder.withName("lambda").withMinimum(1).withMaximum(1).create()) .withDescription("Regularization parameter (default = 0.01) ").withShortName("l").create(); Option iterOpt = obuilder.withLongName("iter").withRequired(false) .withArgument(abuilder.withName("iter").withMinimum(1).withMaximum(1).create()) .withDescription("Number of iterations (default = 10/lambda) ").withShortName("i").create(); Option kOpt = obuilder.withLongName("k").withRequired(false) .withArgument(abuilder.withName("k").withMinimum(1).withMaximum(1).create()) .withDescription("Size of block for stochastic gradient (default = 1)").withShortName("v").create(); Option sampleNumOpt = obuilder.withLongName("trainSampleNum").withRequired(false) .withArgument(abuilder.withName("trainSampleNum").withMinimum(1).withMaximum(1).create()) .withDescription(//from w ww . j a va 2 s . co m "Number of Samples in traindata set, for large-scale dataset optimization (default = 0) ") .withShortName("tsn").create(); Option classNumOpt = obuilder.withLongName("classNum").withRequired(true) .withArgument(abuilder.withName("classNum").withMinimum(1).withMaximum(1).create()) .withDescription("The number of classes (Categories in multi-classification) ").withShortName("c") .create(); Option startingClassIndexOpt = obuilder.withLongName("startingClassIndex").withRequired(false) .withArgument(abuilder.withName("startingClassIndex").withMinimum(1).withMaximum(1).create()) .withDescription("The starting index of class (default = 0) or 1").withShortName("sci").create(); Option hdfsServerOpt = obuilder.withLongName("HDFSServer").withRequired(false) .withArgument(abuilder.withName("HDFSServer").withMinimum(1).withMaximum(1).create()) .withDescription("HDFS Server's Address (default = null) ").withShortName("hdfs").create(); Option svmTypeOpt = obuilder.withLongName("svmType").withRequired(false) .withArgument(abuilder.withName("svmType").withMinimum(1).withMaximum(1).create()) .withDescription("0 -> Binary Classfication, 1 -> Regression, " + "2 -> Multi-Classification (one-vs.-one), 3 -> Multi-Classification (one-vs.-others) ") .withShortName("s").create(); Option modelFileOpt = obuilder.withLongName("modelFile").withRequired(true) .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create()) .withDescription("Name of model file (default = noModelFile) ").withShortName("m").create(); // hadoop system setting. Option mapSplitSizeOpt = obuilder.withLongName("mapSplitSize").withRequired(false) .withArgument(abuilder.withName("mapSplitSize").withMinimum(1).withMaximum(1).create()) .withDescription("Max map Split size ").withShortName("ms").create(); Option maxHeapSizeOpt = obuilder.withLongName("maxHeapSize").withRequired(false) .withArgument(abuilder.withName("maxHeapSize").withMinimum(1).withMaximum(1).create()) .withDescription("Max Heap Size: ").withShortName("mhs").create(); Option numberofReducersOpt = obuilder.withLongName("numberofReducers").withRequired(false) .withArgument(abuilder.withName("numberofReducers").withMinimum(1).withMaximum(1).create()) .withDescription("Number of Reducers: (defaults = 0)").withShortName("nor").create(); Option taskTimeoutOpt = obuilder.withLongName("taskTimeout").withRequired(false) .withArgument(abuilder.withName("taskTimeout").withMinimum(1).withMaximum(1).create()) .withDescription("Task Time out ( Minutes ) : ").withShortName("ttt").create(); Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h") .create(); Group group = gbuilder.withName("Options").withOption(trainFileOpt).withOption(outputFileOpt) .withOption(lambdaOpt).withOption(iterOpt).withOption(kOpt).withOption(svmTypeOpt) .withOption(classNumOpt).withOption(hdfsServerOpt).withOption(modelFileOpt) .withOption(startingClassIndexOpt).withOption(sampleNumOpt).withOption(mapSplitSizeOpt) .withOption(maxHeapSizeOpt).withOption(taskTimeoutOpt).withOption(numberofReducersOpt) .withOption(helpOpt).create(); SVMParameters para = new SVMParameters(); try { Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return; } para.setTrainFile(cmdLine.getValue(trainFileOpt).toString()); para.setOutFile(cmdLine.getValue(outputFileOpt).toString()); // lambda if (cmdLine.hasOption(lambdaOpt)) { para.setLambda(Double.parseDouble(cmdLine.getValue(lambdaOpt).toString())); } // iteration if (cmdLine.hasOption(iterOpt)) { para.setMaxIter(Integer.parseInt(cmdLine.getValue(iterOpt).toString())); } // k if (cmdLine.hasOption(kOpt)) { para.setExamplesPerIter(Integer.parseInt(cmdLine.getValue(kOpt).toString())); } // class number para.setClassNum(Integer.parseInt(cmdLine.getValue(classNumOpt).toString())); // number of samples in training data set. if (cmdLine.hasOption(sampleNumOpt)) { para.setTrainSampleNumber(Integer.parseInt(cmdLine.getValue(sampleNumOpt).toString())); } if (cmdLine.hasOption(startingClassIndexOpt)) { para.setStartingClassIndex(Integer.parseInt(cmdLine.getValue(startingClassIndexOpt).toString())); } // models' path para.setModelFileName(cmdLine.getValue(modelFileOpt).toString()); // hdfs server address if (cmdLine.hasOption(hdfsServerOpt)) { para.setHdfsServerAddr(cmdLine.getValue(hdfsServerOpt).toString()); } // multi classification classificationType if (cmdLine.hasOption(svmTypeOpt)) { para.setClassificationType(Integer.parseInt(cmdLine.getValue(svmTypeOpt).toString())); } // MapReduce system setting. if (cmdLine.hasOption(mapSplitSizeOpt)) { para.setMapSplitSize(Long.parseLong(cmdLine.getValue(mapSplitSizeOpt).toString())); } if (cmdLine.hasOption(numberofReducersOpt)) { para.setNumberReducers(Integer.parseInt(cmdLine.getValue(numberofReducersOpt).toString())); } if (cmdLine.hasOption(maxHeapSizeOpt)) { para.setMaxHeapSize(cmdLine.getValue(maxHeapSizeOpt).toString()); } if (cmdLine.hasOption(taskTimeoutOpt)) { para.setTaskTimeout(Long.parseLong(cmdLine.getValue(taskTimeoutOpt).toString())); } } catch (OptionException e) { log.error("Exception", e); CommandLineUtil.printHelp(group); } // set parameters for the mapper, combiner, reducer // creat a job Job job = new Job(new Configuration()); // step 1.1 set job static parameters ParallelMultiClassifierTrainJob.setJobParameters(job); // step 1.2 set mapper parameters ParallelMultiClassifierTrainJob.setMapperParameters(job.getConfiguration(), para.getMaxIter(), para.getTrainSampleNumber(), para.getClassNum(), para.getClassificationType(), para.getStartingClassIndex()); ParallelMultiClassifierTrainJob.setReducerParameters(job.getConfiguration(), (float) para.getLambda(), para.getExamplesPerIter(), para.getModelFileName(), para.getHdfsServerAddr()); // set general parameters related to a job MapReduceUtil.setJobParameters(job, para.getTrainFile(), para.getOutFile(), para.getMapSplitSize(), para.getNumberReducers(), para.getMaxHeapSize(), para.getTaskTimeout()); // submit a job log.info("job completed: " + MapReduceUtil.submitJob(job)); }
From source file:org.apache.mahout.classifier.svm.algorithm.parallelalgorithms.ParallelMultiClassPredictionDriver.java
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException, OptionException { // example args: // -if /user/maximzhao/dataset/rcv1_test.binary -of // /user/maximzhao/rcv.result // -m /user/maximzhao/rcv1.model -nor 1 -ms 241572968 -mhs -Xmx500M -ttt // 1080//from ww w . ja v a 2 s .c om log.info("[job] " + JOB_NAME); DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option testFileOpt = obuilder.withLongName("testFile").withRequired(true) .withArgument(abuilder.withName("testFile").withMinimum(1).withMaximum(1).create()) .withDescription("Name of test data file (default = noTestFile)").withShortName("if").create(); Option outputFileOpt = obuilder.withLongName("output").withRequired(true) .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create()) .withDescription("Out put file name: ").withShortName("of").create(); Option modelFileOpt = obuilder.withLongName("modelFilePath").withRequired(true) .withArgument(abuilder.withName("modelFilePath").withMinimum(1).withMaximum(1).create()) .withDescription("Name of model files Path (default = /user) ").withShortName("m").create(); Option classNumOpt = obuilder.withLongName("classNum").withRequired(true) .withArgument(abuilder.withName("classNum").withMinimum(1).withMaximum(1).create()) .withDescription("The number of classes (Categories in multi-classification) ").withShortName("c") .create(); Option hdfsServerOpt = obuilder.withLongName("HDFSServer").withRequired(false) .withArgument(abuilder.withName("HDFSServer").withMinimum(1).withMaximum(1).create()) .withDescription("HDFS Server's Address (default = null) ").withShortName("hdfs").create(); // system setup Option mapSplitSizeOpt = obuilder.withLongName("mapSplitSize").withRequired(false) .withArgument(abuilder.withName("mapSplitSize").withMinimum(1).withMaximum(1).create()) .withDescription("Max map Split size ").withShortName("ms").create(); Option maxHeapSizeOpt = obuilder.withLongName("maxHeapSize").withRequired(false) .withArgument(abuilder.withName("maxHeapSize").withMinimum(1).withMaximum(1).create()) .withDescription("Max Heap Size: ").withShortName("mhs").create(); Option numberofReducersOpt = obuilder.withLongName("numberofReducers").withRequired(false) .withArgument(abuilder.withName("numberofReducers").withMinimum(1).withMaximum(1).create()) .withDescription("Number of Reducers: (defaults = 0)").withShortName("nor").create(); Option taskTimeoutOpt = obuilder.withLongName("taskTimeout").withRequired(false) .withArgument(abuilder.withName("taskTimeout").withMinimum(1).withMaximum(1).create()) .withDescription("Task Time out ( Minutes ) : ").withShortName("ttt").create(); Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h") .create(); Group group = gbuilder.withName("Options").withOption(modelFileOpt).withOption(testFileOpt) .withOption(mapSplitSizeOpt).withOption(classNumOpt).withOption(outputFileOpt) .withOption(maxHeapSizeOpt).withOption(hdfsServerOpt).withOption(taskTimeoutOpt) .withOption(numberofReducersOpt).withOption(helpOpt).create(); SVMParameters para = new SVMParameters(); try { Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return; } para.setTestFile(cmdLine.getValue(testFileOpt).toString()); para.setOutFile(cmdLine.getValue(outputFileOpt).toString()); // models' path para.setModelFileName(cmdLine.getValue(modelFileOpt).toString()); // class number para.setClassNum(Integer.parseInt(cmdLine.getValue(classNumOpt).toString())); // hdfs server address if (cmdLine.hasOption(hdfsServerOpt)) { para.setHdfsServerAddr(cmdLine.getValue(hdfsServerOpt).toString()); } if (cmdLine.hasOption(mapSplitSizeOpt)) { para.setMapSplitSize(Long.parseLong(cmdLine.getValue(mapSplitSizeOpt).toString())); } if (cmdLine.hasOption(numberofReducersOpt)) { para.setNumberReducers(Integer.parseInt(cmdLine.getValue(numberofReducersOpt).toString())); } if (cmdLine.hasOption(maxHeapSizeOpt)) { para.setMaxHeapSize(cmdLine.getValue(maxHeapSizeOpt).toString()); } if (cmdLine.hasOption(taskTimeoutOpt)) { para.setTaskTimeout(Long.parseLong(cmdLine.getValue(taskTimeoutOpt).toString())); } } catch (OptionException e) { log.error("Exception", e); CommandLineUtil.printHelp(group); } // creat a job Job job = new Job(new Configuration()); // step 1.1 set job static parameters ParallelMultiClassPredictionJob.setJobParameters(job); // step 1.2 set mapper parameters ParallelMultiClassPredictionJob.setMapperParameters(job.getConfiguration(), para.getModelFileName(), para.getHdfsServerAddr(), para.getClassNum(), para.getClassificationType()); // set general parameters related to a job MapReduceUtil.setJobParameters(job, para.getTestFile(), para.getOutFile(), para.getMapSplitSize(), para.getNumberReducers(), para.getMaxHeapSize(), para.getTaskTimeout()); // submit a job log.info("job completed: " + MapReduceUtil.submitJob(job)); }
From source file:org.apache.mahout.classifier.svm.algorithm.sequentialalgorithms.SVMSequentialPrediction.java
public static void main(String[] args) throws IOException, OptionException { if (args.length < 1) { args = new String[] { "-te", "../examples/src/test/resources/svmdataset/test.dat", "-m", "../examples/src/test/resources/svmdataset/SVM.model" }; // args = new String[] { // "-te", // "/media/Data/MachineLearningDataset/triazines_scale.t", // "-m", "/home/maximzhao/SVMregression.model", "-s", // "1"};/*from www . ja v a 2 s . com*/ // args = new String[] { // "-te", // "/media/Data/MachineLearningDataset/rcv1_train.binary", // "-m", "/home/maximzhao/SVMrcv1.model"}; // args = new String[] {"-te", // "/media/Data/MachineLearningDataset/protein.t", // "-m", "/home/maximzhao/sectormulti/SVMprotein.model", // "-s", "2"}; // args = new String[] {"-te", // "/media/Data/MachineLearningDataset/poker.t", // "-m", "/home/maximzhao/sectormulti/SVMpoker.model", // "-s", "3"}; // args = new String[] {"-te", "/media/Data/MachineLearningDataset/poker", // "-m", "/user/maximzhao/pokerpro", "-s", "3", // "-hdfs", "hdfs://localhost:12009"}; } DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option testFileOpt = obuilder.withLongName("testFile").withRequired(true) .withArgument(abuilder.withName("testFile").withMinimum(1).withMaximum(1).create()) .withDescription("Name of test data file (default = noTestFile)").withShortName("te").create(); Option svmTypeOpt = obuilder.withLongName("svmType").withRequired(false) .withArgument(abuilder.withName("svmType").withMinimum(1).withMaximum(1).create()) .withDescription("0 -> Binary Classfication, 1 -> Regression, " + "2 -> Multi-Classification (one-vs.-one), 3 -> Multi-Classification (one-vs.-others) ") .withShortName("s").create(); Option modelFileOpt = obuilder.withLongName("modelFile").withRequired(true) .withArgument(abuilder.withName("modelFile").withMinimum(1).withMaximum(1).create()) .withDescription("Name of model file (default = noModelFile) ").withShortName("m").create(); Option hdfsServerOpt = obuilder.withLongName("HDFSServer").withRequired(false) .withArgument(abuilder.withName("HDFSServer").withMinimum(1).withMaximum(1).create()) .withDescription("HDFS Server's Address (default = null) ").withShortName("hdfs").create(); Option predictedFileOpt = obuilder.withLongName("predictedFile").withRequired(false) .withArgument(abuilder.withName("predictedFile").withMinimum(1).withMaximum(1).create()) .withDescription("File to store predicted label(default = testFile.predict) ").withShortName("p") .create(); Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h") .create(); Group group = gbuilder.withName("Options").withOption(modelFileOpt).withOption(predictedFileOpt) .withOption(testFileOpt).withOption(svmTypeOpt).withOption(helpOpt).withOption(hdfsServerOpt) .create(); SVMParameters para = new SVMParameters(); try { Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return; } para.setTestFile(cmdLine.getValue(testFileOpt).toString()); para.setModelFileName(cmdLine.getValue(modelFileOpt).toString()); // svm classificationType if (cmdLine.hasOption(svmTypeOpt)) { para.setClassificationType(Integer.parseInt(cmdLine.getValue(svmTypeOpt).toString())); } else { para.setClassificationType(0); // default classfication } if (cmdLine.hasOption(predictedFileOpt)) { para.setOutFile(cmdLine.getValue(predictedFileOpt).toString()); } else { para.setOutFile(para.getTestFile() + ".predict"); } // hdfs server address if (cmdLine.hasOption(hdfsServerOpt)) { para.setHdfsServerAddr(cmdLine.getValue(hdfsServerOpt).toString()); } else { para.setHdfsServerAddr(null); } } catch (OptionException e) { log.error("Exception", e); CommandLineUtil.printHelp(group); } // load test data set DataSetHandler test = new DataSetHandler(para.getTestFile()); Prediction predictor = PredictionFactory.getInstance(para.getClassificationType()); predictor.prediction(test, para); para.report(para.getClassificationType()); log.info("Done!"); }
From source file:org.apache.mahout.classifier.svm.algorithm.sequentialalgorithms.SVMSequentialTraining.java
public static void main(String[] args) throws IOException, OptionException { if (args.length < 1) { args = new String[] { "-tr", "../examples/src/test/resources/svmdataset/train.dat", "-m", "../examples/src/test/resources/svmdataset/SVM.model" }; // args = new String[] { // "-tr", // "/media/Data/MachineLearningDataset/triazines_scale", // "-m", "/home/maximzhao/SVMregression.model", "-s", // "1"}; // // for rcv1 // args = new String[] { // "-tr", // "/media/Data/MachineLearningDataset/rcv1_test.binary", // "-m", "/home/maximzhao/SVMrcv1.model", "-ts", // "677399"}; // args = new String[] {"-tr", "/media/Data/MachineLearningDataset/protein", // "-m", "/home/maximzhao/sectormulti/SVMprotein.model", // "-s", "2"}; // args = new String[] {"-tr", "/media/Data/MachineLearningDataset/poker", // "-m", "/home/maximzhao/sectormulti/SVMpoker.model", // "-s", "3"}; // args = new String[] {"-tr", "/user/maximzhao/dataset/train.dat", "-hdfs", // "hdfs://localhost:12009", "-m", // "../examples/src/test/resources/svmdataset/SVM.model"}; }//from w w w.ja v a 2 s . c om DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option trainFileOpt = obuilder.withLongName("trainFile").withRequired(true) .withArgument(abuilder.withName("trainFile").withMinimum(1).withMaximum(1).create()) .withDescription("Training data set file").withShortName("tr").create(); Option modelFileOpt = obuilder.withLongName("modelFile").withRequired(false) .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create()) .withDescription("Name of model file (default = noModelFile) ").withShortName("m").create(); Option svmTypeOpt = obuilder.withLongName("svmType").withRequired(false) .withArgument(abuilder.withName("svmType").withMinimum(1).withMaximum(1).create()) .withDescription("0 -> Binary Classfication, 1 -> Regression, " + "2 -> Multi-Classification (one-vs.-one), 3 -> Multi-Classification (one-vs.-others) ") .withShortName("s").create(); Option epsilonOpt = obuilder.withLongName("epsilon").withRequired(false) .withArgument(abuilder.withName("epsilon").withMinimum(1).withMaximum(1).create()) .withDescription("epsilon for regression (default = 0.1) ").withShortName("e").create(); Option lambdaOpt = obuilder.withLongName("lambda").withRequired(false) .withArgument(abuilder.withName("lambda").withMinimum(1).withMaximum(1).create()) .withDescription("Regularization parameter (default = 0.01) ").withShortName("l").create(); Option iterOpt = obuilder.withLongName("iter").withRequired(false) .withArgument(abuilder.withName("iter").withMinimum(1).withMaximum(1).create()) .withDescription("Number of iterations (default = 10/lambda) ").withShortName("i").create(); Option validateExampleNumberOpt = obuilder.withLongName("validateExampleNumber").withRequired(false) .withArgument(abuilder.withName("validateExampleNumber").withMinimum(1).withMaximum(1).create()) .withDescription("Number of validate Examples (default = Maximum iteration / 10) ") .withShortName("ven").create(); Option kOpt = obuilder.withLongName("k").withRequired(false) .withArgument(abuilder.withName("k").withMinimum(1).withMaximum(1).create()) .withDescription("Size of block for stochastic gradient (default = 1)").withShortName("v").create(); Option sampleNumOpt = obuilder.withLongName("trainSampleNum").withRequired(false) .withArgument(abuilder.withName("trainSampleNum").withMinimum(1).withMaximum(1).create()) .withDescription( "Number of Samples in traindata set, for large-scale dataset optimization (default = 0) ") .withShortName("ts").create(); Option hdfsServerOpt = obuilder.withLongName("HDFSServer").withRequired(false) .withArgument(abuilder.withName("HDFSServer").withMinimum(1).withMaximum(1).create()) .withDescription("HDFS Server's Address (default = null) ").withShortName("hdfs").create(); Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h") .create(); Group group = gbuilder.withName("Options").withOption(trainFileOpt).withOption(validateExampleNumberOpt) .withOption(modelFileOpt).withOption(svmTypeOpt).withOption(lambdaOpt).withOption(hdfsServerOpt) .withOption(iterOpt).withOption(epsilonOpt).withOption(kOpt).withOption(sampleNumOpt) .withOption(helpOpt).create(); SVMParameters para = new SVMParameters(); try { Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return; } para.setTrainFile(cmdLine.getValue(trainFileOpt).toString()); // svm classificationType if (cmdLine.hasOption(svmTypeOpt)) { para.setClassificationType(Integer.parseInt(cmdLine.getValue(svmTypeOpt).toString())); } // epsilon if (cmdLine.hasOption(epsilonOpt)) { para.setEpsilon(Double.parseDouble(cmdLine.getValue(epsilonOpt).toString())); } // lambda if (cmdLine.hasOption(lambdaOpt)) { para.setLambda(Double.parseDouble(cmdLine.getValue(lambdaOpt).toString())); } // iteration if (cmdLine.hasOption(iterOpt)) { para.setMaxIter(Integer.parseInt(cmdLine.getValue(iterOpt).toString())); } // iteration if (cmdLine.hasOption(validateExampleNumberOpt)) { para.setValidateExampleNumber( Integer.parseInt(cmdLine.getValue(validateExampleNumberOpt).toString())); } else { para.setValidateExampleNumber(para.getMaxIter() / 10); } // k if (cmdLine.hasOption(kOpt)) { para.setExamplesPerIter(Integer.parseInt(cmdLine.getValue(kOpt).toString())); } if (cmdLine.hasOption(modelFileOpt)) { para.setModelFileName(cmdLine.getValue(modelFileOpt).toString()); } else { para.setModelFileName("SVM.model"); } // number of samples in training data set. if (cmdLine.hasOption(sampleNumOpt)) { para.setTrainSampleNumber(Integer.parseInt(cmdLine.getValue(sampleNumOpt).toString())); } // hdfs server address if (cmdLine.hasOption(hdfsServerOpt)) { para.setHdfsServerAddr(cmdLine.getValue(hdfsServerOpt).toString()); } else { para.setHdfsServerAddr(null); } } catch (OptionException e) { log.error("Exception", e); CommandLineUtil.printHelp(group); } DataSetHandler train = new DataSetHandler(para.getTrainFile()); // Get data set train.getData(para); Training classifier = TrainingFactory.getInstance(para.getClassificationType()); classifier.training(train, para); para.report(para.getClassificationType()); log.info("All Processes are Finished!!"); }
From source file:org.apache.mahout.clustering.canopy.CanopyClusteringJob.java
/** * @param args//from ww w . ja v a 2s.c o m */ public static void main(String[] args) throws IOException, ClassNotFoundException { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option inputOpt = obuilder.withLongName("input").withRequired(true) .withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create()) .withDescription("The Path for input Vectors. Must be a SequenceFile of Writable, Vector") .withShortName("i").create(); Option outputOpt = obuilder.withLongName("output").withRequired(true) .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create()) .withDescription("The Path to put the output in").withShortName("o").create(); Option measureClassOpt = obuilder.withLongName("distance").withRequired(false) .withArgument(abuilder.withName("distance").withMinimum(1).withMaximum(1).create()) .withDescription("The Distance Measure to use. Default is SquaredEuclidean").withShortName("m") .create(); Option vectorClassOpt = obuilder.withLongName("vectorClass").withRequired(false) .withArgument(abuilder.withName("vectorClass").withMinimum(1).withMaximum(1).create()) .withDescription("The Vector implementation class name. Default is SparseVector.class") .withShortName("v").create(); Option t1Opt = obuilder.withLongName("t1").withRequired(true) .withArgument(abuilder.withName("t1").withMinimum(1).withMaximum(1).create()).withDescription("t1") .withShortName("t1").create(); Option t2Opt = obuilder.withLongName("t2").withRequired(true) .withArgument(abuilder.withName("t2").withMinimum(1).withMaximum(1).create()).withDescription("t2") .withShortName("t2").create(); Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h") .create(); Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt) .withOption(measureClassOpt).withOption(vectorClassOpt).withOption(t1Opt).withOption(t2Opt) .withOption(helpOpt).create(); try { Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return; } String input = cmdLine.getValue(inputOpt).toString(); String output = cmdLine.getValue(outputOpt).toString(); String measureClass = SquaredEuclideanDistanceMeasure.class.getName(); if (cmdLine.hasOption(measureClassOpt)) { measureClass = cmdLine.getValue(measureClassOpt).toString(); } Class<? extends Vector> vectorClass = cmdLine.hasOption(vectorClassOpt) == false ? SparseVector.class : (Class<? extends Vector>) Class.forName(cmdLine.getValue(vectorClassOpt).toString()); double t1 = Double.parseDouble(cmdLine.getValue(t1Opt).toString()); double t2 = Double.parseDouble(cmdLine.getValue(t2Opt).toString()); runJob(input, output, measureClass, t1, t2, vectorClass); } catch (OptionException e) { log.error("Exception", e); CommandLineUtil.printHelp(group); } }
From source file:org.apache.mahout.clustering.canopy.ClusterDriver.java
public static void main(String[] args) throws IOException, ClassNotFoundException { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option vectorClassOpt = obuilder.withLongName("vectorClass").withRequired(false) .withArgument(abuilder.withName("vectorClass").withMinimum(1).withMaximum(1).create()) .withDescription("The Vector implementation class name. Default is SparseVector.class") .withShortName("v").create(); Option t1Opt = obuilder.withLongName("t1").withRequired(true) .withArgument(abuilder.withName("t1").withMinimum(1).withMaximum(1).create()).withDescription("t1") .withShortName("t1").create(); Option t2Opt = obuilder.withLongName("t2").withRequired(true) .withArgument(abuilder.withName("t2").withMinimum(1).withMaximum(1).create()).withDescription("t2") .withShortName("t2").create(); Option pointsOpt = obuilder.withLongName("points").withRequired(true) .withArgument(abuilder.withName("points").withMinimum(1).withMaximum(1).create()) .withDescription("The path containing the points").withShortName("p").create(); Option canopiesOpt = obuilder.withLongName("canopies").withRequired(true) .withArgument(abuilder.withName("canopies").withMinimum(1).withMaximum(1).create()) .withDescription("The location of the canopies, as a Path").withShortName("c").create(); Option measureClassOpt = obuilder.withLongName("distance").withRequired(false) .withArgument(abuilder.withName("distance").withMinimum(1).withMaximum(1).create()) .withDescription("The Distance Measure to use. Default is SquaredEuclidean").withShortName("m") .create();/*from w w w .j av a2s . co m*/ Option outputOpt = obuilder.withLongName("output").withRequired(true) .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create()) .withDescription("The Path to put the output in").withShortName("o").create(); Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h") .create(); Group group = gbuilder.withName("Options").withOption(vectorClassOpt).withOption(t1Opt).withOption(t2Opt) .withOption(pointsOpt).withOption(canopiesOpt).withOption(measureClassOpt).withOption(outputOpt) .withOption(helpOpt).create(); try { Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return; } String measureClass = SquaredEuclideanDistanceMeasure.class.getName(); if (cmdLine.hasOption(measureClassOpt)) { measureClass = cmdLine.getValue(measureClassOpt).toString(); } String output = cmdLine.getValue(outputOpt).toString(); String canopies = cmdLine.getValue(canopiesOpt).toString(); String points = cmdLine.getValue(pointsOpt).toString(); Class<? extends Vector> vectorClass = cmdLine.hasOption(vectorClassOpt) == false ? SparseVector.class : (Class<? extends Vector>) Class.forName(cmdLine.getValue(vectorClassOpt).toString()); double t1 = Double.parseDouble(cmdLine.getValue(t1Opt).toString()); double t2 = Double.parseDouble(cmdLine.getValue(t2Opt).toString()); runJob(points, canopies, output, measureClass, t1, t2, vectorClass); } catch (OptionException e) { log.error("Exception", e); CommandLineUtil.printHelp(group); } }
From source file:org.apache.mahout.clustering.cdbw.CDbwDriver.java
public static void main(String[] args) throws Exception { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option inputOpt = DefaultOptionCreator.inputOption().create(); Option outputOpt = DefaultOptionCreator.outputOption().create(); Option maxIterOpt = DefaultOptionCreator.maxIterationsOption().create(); Option helpOpt = DefaultOptionCreator.helpOption(); Option modelOpt = obuilder.withLongName("modelClass").withRequired(true).withShortName("d") .withArgument(abuilder.withName("modelClass").withMinimum(1).withMaximum(1).create()) .withDescription("The ModelDistribution class name. " + "Defaults to org.apache.mahout.clustering.dirichlet.models.NormalModelDistribution") .create();/*from w w w.jav a 2 s. co m*/ Option numRedOpt = obuilder.withLongName("maxRed").withRequired(true).withShortName("r") .withArgument(abuilder.withName("maxRed").withMinimum(1).withMaximum(1).create()) .withDescription("The number of reduce tasks.").create(); Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(modelOpt) .withOption(maxIterOpt).withOption(helpOpt).withOption(numRedOpt).create(); try { Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return; } Path input = new Path(cmdLine.getValue(inputOpt).toString()); Path output = new Path(cmdLine.getValue(outputOpt).toString()); String modelFactory = "org.apache.mahout.clustering.dirichlet.models.NormalModelDistribution"; if (cmdLine.hasOption(modelOpt)) { modelFactory = cmdLine.getValue(modelOpt).toString(); } int numReducers = Integer.parseInt(cmdLine.getValue(numRedOpt).toString()); int maxIterations = Integer.parseInt(cmdLine.getValue(maxIterOpt).toString()); runJob(input, null, output, modelFactory, maxIterations, numReducers); } catch (OptionException e) { log.error("Exception parsing command line: ", e); CommandLineUtil.printHelp(group); } }
From source file:org.apache.mahout.clustering.conversion.InputDriver.java
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option inputOpt = DefaultOptionCreator.inputOption().withRequired(false).create(); Option outputOpt = DefaultOptionCreator.outputOption().withRequired(false).create(); Option vectorOpt = obuilder.withLongName("vector").withRequired(false) .withArgument(abuilder.withName("v").withMinimum(1).withMaximum(1).create()) .withDescription("The vector implementation to use.").withShortName("v").create(); Option helpOpt = DefaultOptionCreator.helpOption(); Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(vectorOpt) .withOption(helpOpt).create(); try {//from w w w .ja v a 2 s.co m Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return; } Path input = new Path(cmdLine.getValue(inputOpt, "testdata").toString()); Path output = new Path(cmdLine.getValue(outputOpt, "output").toString()); String vectorClassName = cmdLine.getValue(vectorOpt, "org.apache.mahout.math.RandomAccessSparseVector") .toString(); runJob(input, output, vectorClassName); } catch (OptionException e) { log.error("Exception parsing command line: ", e); CommandLineUtil.printHelp(group); } }
From source file:org.apache.mahout.clustering.lda.cvb.InMemoryCollapsedVariationalBayes0.java
public static int main2(String[] args, Configuration conf) throws Exception { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option helpOpt = DefaultOptionCreator.helpOption(); Option inputDirOpt = obuilder.withLongName("input").withRequired(true) .withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create()) .withDescription("The Directory on HDFS containing the collapsed, properly formatted files having " + "one doc per line") .withShortName("i").create(); Option dictOpt = obuilder.withLongName("dictionary").withRequired(false) .withArgument(abuilder.withName("dictionary").withMinimum(1).withMaximum(1).create()) .withDescription("The path to the term-dictionary format is ... ").withShortName("d").create(); Option dfsOpt = obuilder.withLongName("dfs").withRequired(false) .withArgument(abuilder.withName("dfs").withMinimum(1).withMaximum(1).create()) .withDescription("HDFS namenode URI").withShortName("dfs").create(); Option numTopicsOpt = obuilder.withLongName("numTopics").withRequired(true) .withArgument(abuilder.withName("numTopics").withMinimum(1).withMaximum(1).create()) .withDescription("Number of topics to learn").withShortName("top").create(); Option outputTopicFileOpt = obuilder.withLongName("topicOutputFile").withRequired(true) .withArgument(abuilder.withName("topicOutputFile").withMinimum(1).withMaximum(1).create()) .withDescription("File to write out p(term | topic)").withShortName("to").create(); Option outputDocFileOpt = obuilder.withLongName("docOutputFile").withRequired(true) .withArgument(abuilder.withName("docOutputFile").withMinimum(1).withMaximum(1).create()) .withDescription("File to write out p(topic | docid)").withShortName("do").create(); Option alphaOpt = obuilder.withLongName("alpha").withRequired(false) .withArgument(abuilder.withName("alpha").withMinimum(1).withMaximum(1).withDefault("0.1").create()) .withDescription("Smoothing parameter for p(topic | document) prior").withShortName("a").create(); Option etaOpt = obuilder.withLongName("eta").withRequired(false) .withArgument(abuilder.withName("eta").withMinimum(1).withMaximum(1).withDefault("0.1").create()) .withDescription("Smoothing parameter for p(term | topic)").withShortName("e").create(); Option maxIterOpt = obuilder.withLongName("maxIterations").withRequired(false) .withArgument(//from w w w. j a v a 2 s . c o m abuilder.withName("maxIterations").withMinimum(1).withMaximum(1).withDefault("10").create()) .withDescription("Maximum number of training passes").withShortName("m").create(); Option modelCorpusFractionOption = obuilder.withLongName("modelCorpusFraction").withRequired(false) .withArgument(abuilder.withName("modelCorpusFraction").withMinimum(1).withMaximum(1) .withDefault("0.0").create()) .withShortName("mcf").withDescription("For online updates, initial value of |model|/|corpus|") .create(); Option burnInOpt = obuilder .withLongName("burnInIterations").withRequired(false).withArgument(abuilder .withName("burnInIterations").withMinimum(1).withMaximum(1).withDefault("5").create()) .withDescription("Minimum number of iterations").withShortName("b").create(); Option convergenceOpt = obuilder.withLongName("convergence").withRequired(false) .withArgument( abuilder.withName("convergence").withMinimum(1).withMaximum(1).withDefault("0.0").create()) .withDescription("Fractional rate of perplexity to consider convergence").withShortName("c") .create(); Option reInferDocTopicsOpt = obuilder.withLongName("reInferDocTopics").withRequired(false) .withArgument(abuilder.withName("reInferDocTopics").withMinimum(1).withMaximum(1).withDefault("no") .create()) .withDescription("re-infer p(topic | doc) : [no | randstart | continue]").withShortName("rdt") .create(); Option numTrainThreadsOpt = obuilder .withLongName("numTrainThreads").withRequired(false).withArgument(abuilder .withName("numTrainThreads").withMinimum(1).withMaximum(1).withDefault("1").create()) .withDescription("number of threads to train with").withShortName("ntt").create(); Option numUpdateThreadsOpt = obuilder.withLongName("numUpdateThreads").withRequired(false) .withArgument(abuilder.withName("numUpdateThreads").withMinimum(1).withMaximum(1).withDefault("1") .create()) .withDescription("number of threads to update the model with").withShortName("nut").create(); Option verboseOpt = obuilder.withLongName("verbose").withRequired(false) .withArgument( abuilder.withName("verbose").withMinimum(1).withMaximum(1).withDefault("false").create()) .withDescription("print verbose information, like top-terms in each topic, during iteration") .withShortName("v").create(); Group group = gbuilder.withName("Options").withOption(inputDirOpt).withOption(numTopicsOpt) .withOption(alphaOpt).withOption(etaOpt).withOption(maxIterOpt).withOption(burnInOpt) .withOption(convergenceOpt).withOption(dictOpt).withOption(reInferDocTopicsOpt) .withOption(outputDocFileOpt).withOption(outputTopicFileOpt).withOption(dfsOpt) .withOption(numTrainThreadsOpt).withOption(numUpdateThreadsOpt) .withOption(modelCorpusFractionOption).withOption(verboseOpt).create(); try { Parser parser = new Parser(); parser.setGroup(group); parser.setHelpOption(helpOpt); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return -1; } String inputDirString = (String) cmdLine.getValue(inputDirOpt); String dictDirString = cmdLine.hasOption(dictOpt) ? (String) cmdLine.getValue(dictOpt) : null; int numTopics = Integer.parseInt((String) cmdLine.getValue(numTopicsOpt)); double alpha = Double.parseDouble((String) cmdLine.getValue(alphaOpt)); double eta = Double.parseDouble((String) cmdLine.getValue(etaOpt)); int maxIterations = Integer.parseInt((String) cmdLine.getValue(maxIterOpt)); int burnInIterations = Integer.parseInt((String) cmdLine.getValue(burnInOpt)); double minFractionalErrorChange = Double.parseDouble((String) cmdLine.getValue(convergenceOpt)); int numTrainThreads = Integer.parseInt((String) cmdLine.getValue(numTrainThreadsOpt)); int numUpdateThreads = Integer.parseInt((String) cmdLine.getValue(numUpdateThreadsOpt)); String topicOutFile = (String) cmdLine.getValue(outputTopicFileOpt); String docOutFile = (String) cmdLine.getValue(outputDocFileOpt); //String reInferDocTopics = (String)cmdLine.getValue(reInferDocTopicsOpt); boolean verbose = Boolean.parseBoolean((String) cmdLine.getValue(verboseOpt)); double modelCorpusFraction = Double.parseDouble((String) cmdLine.getValue(modelCorpusFractionOption)); long start = System.nanoTime(); if (conf.get("fs.default.name") == null) { String dfsNameNode = (String) cmdLine.getValue(dfsOpt); conf.set("fs.default.name", dfsNameNode); } String[] terms = loadDictionary(dictDirString, conf); logTime("dictionary loading", System.nanoTime() - start); start = System.nanoTime(); Matrix corpus = loadVectors(inputDirString, conf); logTime("vector seqfile corpus loading", System.nanoTime() - start); start = System.nanoTime(); InMemoryCollapsedVariationalBayes0 cvb0 = new InMemoryCollapsedVariationalBayes0(corpus, terms, numTopics, alpha, eta, numTrainThreads, numUpdateThreads, modelCorpusFraction); logTime("cvb0 init", System.nanoTime() - start); start = System.nanoTime(); cvb0.setVerbose(verbose); cvb0.iterateUntilConvergence(minFractionalErrorChange, maxIterations, burnInIterations); logTime("total training time", System.nanoTime() - start); /* if ("randstart".equalsIgnoreCase(reInferDocTopics)) { cvb0.inferDocuments(0.0, 100, true); } else if ("continue".equalsIgnoreCase(reInferDocTopics)) { cvb0.inferDocuments(0.0, 100, false); } */ start = System.nanoTime(); cvb0.writeModel(new Path(topicOutFile)); DistributedRowMatrixWriter.write(new Path(docOutFile), conf, cvb0.docTopicCounts); logTime("printTopics", System.nanoTime() - start); } catch (OptionException e) { log.error("Error while parsing options", e); CommandLineUtil.printHelp(group); } return 0; }