List of usage examples for org.apache.hadoop.conf Configuration setInt
public void setInt(String name, int value)
name
property to an int
. From source file:edu.isi.mavuno.app.distsim.ContextToPattern.java
License:Apache License
public int run() throws ClassNotFoundException, InterruptedException, IOException { Configuration conf = getConf(); String contextPath = MavunoUtils.getRequiredParam("Mavuno.ContextToPattern.ContextPath", conf); String corpusPath = MavunoUtils.getRequiredParam("Mavuno.ContextToPattern.CorpusPath", conf); String corpusClass = MavunoUtils.getRequiredParam("Mavuno.ContextToPattern.CorpusClass", conf); String extractorClass = MavunoUtils.getRequiredParam("Mavuno.ContextToPattern.ExtractorClass", conf); String extractorArgs = MavunoUtils.getRequiredParam("Mavuno.ContextToPattern.ExtractorArgs", conf); String minMatches = MavunoUtils.getRequiredParam("Mavuno.ContextToPattern.MinMatches", conf); boolean harvestGlobalStats = Boolean .parseBoolean(MavunoUtils.getRequiredParam("Mavuno.ContextToPattern.GlobalStats", conf)); String outputPath = MavunoUtils.getRequiredParam("Mavuno.ContextToPattern.OutputPath", conf); MavunoUtils.createDirectory(conf, outputPath); sLogger.info("Tool name: ContextToPattern"); sLogger.info(" - Context path: " + contextPath); sLogger.info(" - Corpus path: " + corpusPath); sLogger.info(" - Corpus class: " + corpusClass); sLogger.info(" - Extractor class: " + extractorClass); sLogger.info(" - Extractor args: " + extractorArgs); sLogger.info(" - Min matches: " + minMatches); sLogger.info(" - Harvest global stats: " + harvestGlobalStats); sLogger.info(" - Output path: " + outputPath); // set total terms path conf.set("Mavuno.TotalTermsPath", outputPath + "/totalTerms"); // split contexts into manageable chunks conf.set("Mavuno.Split.InputPath", contextPath); conf.set("Mavuno.Split.OutputPath", outputPath + "/contexts-split"); conf.set("Mavuno.Split.SplitKey", "context"); new Split(conf).run(); // get context splits FileStatus[] files = MavunoUtils.getDirectoryListing(conf, outputPath + "/contexts-split"); int split = 0; for (FileStatus file : files) { if (!file.getPath().getName().endsWith(".examples")) { continue; }//from w w w . j a v a2 s . c o m // extract patterns conf.set("Mavuno.Extract.InputPath", file.getPath().toString()); conf.set("Mavuno.Extract.CorpusPath", corpusPath); conf.set("Mavuno.Extract.CorpusClass", corpusClass); conf.set("Mavuno.Extract.ExtractorClass", extractorClass); conf.set("Mavuno.Extract.ExtractorArgs", extractorArgs); conf.set("Mavuno.Extract.ExtractorTarget", "pattern"); conf.set("Mavuno.Extract.MinMatches", minMatches); conf.set("Mavuno.Extract.OutputPath", outputPath + "/contexts-split/patterns/" + split); new Extract(conf).run(); // increment split split++; } // extract global pattern statistics if necessary if (harvestGlobalStats) { conf.set("Mavuno.ExtractGlobalStats.InputPath", outputPath + "/contexts-split/patterns/"); conf.set("Mavuno.ExtractGlobalStats.CorpusPath", corpusPath); conf.set("Mavuno.ExtractGlobalStats.CorpusClass", corpusClass); conf.set("Mavuno.ExtractGlobalStats.ExtractorClass", extractorClass); conf.set("Mavuno.ExtractGlobalStats.ExtractorArgs", extractorArgs); conf.set("Mavuno.ExtractGlobalStats.ExtractorTarget", "pattern"); conf.set("Mavuno.ExtractGlobalStats.OutputPath", outputPath + "/contexts-split/pattern-stats/"); new ExtractGlobalStats(conf).run(); } // combine context splits conf.set("Mavuno.CombineSplits.ExamplesPath", outputPath + "/contexts-split/patterns"); conf.set("Mavuno.CombineSplits.ExampleStatsPath", outputPath + "/contexts-split/pattern-stats"); conf.set("Mavuno.CombineSplits.SplitKey", "context"); conf.setInt("Mavuno.CombineSplits.TotalSplits", split); conf.set("Mavuno.CombineSplits.OutputPath", outputPath + "/pattern-stats"); new CombineSplits(conf).run(); // delete context splits MavunoUtils.removeDirectory(conf, outputPath + "/contexts-split"); return 0; }
From source file:edu.isi.mavuno.app.distsim.PatternToContext.java
License:Apache License
public int run() throws ClassNotFoundException, InterruptedException, IOException { Configuration conf = getConf(); String patternPath = MavunoUtils.getRequiredParam("Mavuno.PatternToContext.PatternPath", conf); String corpusPath = MavunoUtils.getRequiredParam("Mavuno.PatternToContext.CorpusPath", conf); String corpusClass = MavunoUtils.getRequiredParam("Mavuno.PatternToContext.CorpusClass", conf); String extractorClass = MavunoUtils.getRequiredParam("Mavuno.PatternToContext.ExtractorClass", conf); String extractorArgs = MavunoUtils.getRequiredParam("Mavuno.PatternToContext.ExtractorArgs", conf); String minMatches = MavunoUtils.getRequiredParam("Mavuno.PatternToContext.MinMatches", conf); boolean harvestGlobalStats = Boolean .parseBoolean(MavunoUtils.getRequiredParam("Mavuno.PatternToContext.GlobalStats", conf)); String outputPath = MavunoUtils.getRequiredParam("Mavuno.PatternToContext.OutputPath", conf); MavunoUtils.createDirectory(conf, outputPath); sLogger.info("Tool name: PatternToContext"); sLogger.info(" - Pattern path: " + patternPath); sLogger.info(" - Corpus path: " + corpusPath); sLogger.info(" - Corpus class: " + corpusClass); sLogger.info(" - Extractor class: " + extractorClass); sLogger.info(" - Extractor args: " + extractorArgs); sLogger.info(" - Min matches: " + minMatches); sLogger.info(" - Harvest global stats: " + harvestGlobalStats); sLogger.info(" - Output path: " + outputPath); // set total terms path conf.set("Mavuno.TotalTermsPath", outputPath + "/totalTerms"); // split patterns into manageable chunks conf.set("Mavuno.Split.InputPath", patternPath); conf.set("Mavuno.Split.OutputPath", outputPath + "/patterns-split"); conf.set("Mavuno.Split.SplitKey", "pattern"); new Split(conf).run(); // get pattern splits FileStatus[] files = MavunoUtils.getDirectoryListing(conf, outputPath + "/patterns-split"); int split = 0; for (FileStatus file : files) { if (!file.getPath().getName().endsWith(".examples")) { continue; }// w w w . j av a2 s. c o m // extract contexts conf.set("Mavuno.Extract.InputPath", file.getPath().toString()); conf.set("Mavuno.Extract.CorpusPath", corpusPath); conf.set("Mavuno.Extract.CorpusClass", corpusClass); conf.set("Mavuno.Extract.ExtractorClass", extractorClass); conf.set("Mavuno.Extract.ExtractorArgs", extractorArgs); conf.set("Mavuno.Extract.ExtractorTarget", "context"); conf.set("Mavuno.Extract.MinMatches", minMatches); conf.set("Mavuno.Extract.OutputPath", outputPath + "/patterns-split/contexts/" + split); new Extract(conf).run(); // increment split split++; } // extract global context statistics if necessary if (harvestGlobalStats) { conf.set("Mavuno.ExtractGlobalStats.InputPath", outputPath + "/patterns-split/contexts/"); conf.set("Mavuno.ExtractGlobalStats.CorpusPath", corpusPath); conf.set("Mavuno.ExtractGlobalStats.CorpusClass", corpusClass); conf.set("Mavuno.ExtractGlobalStats.ExtractorClass", extractorClass); conf.set("Mavuno.ExtractGlobalStats.ExtractorArgs", extractorArgs); conf.set("Mavuno.ExtractGlobalStats.ExtractorTarget", "context"); conf.set("Mavuno.ExtractGlobalStats.OutputPath", outputPath + "/patterns-split/context-stats/"); new ExtractGlobalStats(conf).run(); } // combine pattern splits conf.set("Mavuno.CombineSplits.ExamplesPath", outputPath + "/patterns-split/contexts"); conf.set("Mavuno.CombineSplits.ExampleStatsPath", outputPath + "/patterns-split/context-stats"); conf.set("Mavuno.CombineSplits.SplitKey", "pattern"); conf.setInt("Mavuno.CombineSplits.TotalSplits", split); conf.set("Mavuno.CombineSplits.OutputPath", outputPath + "/context-stats"); new CombineSplits(conf).run(); // delete pattern splits MavunoUtils.removeDirectory(conf, outputPath + "/patterns-split"); return 0; }
From source file:edu.isi.mavuno.app.distsim.PatternToPattern.java
License:Apache License
public int run() throws ClassNotFoundException, InterruptedException, IOException { Configuration conf = getConf(); String patternPath = MavunoUtils.getRequiredParam("Mavuno.PatternToPattern.PatternPath", conf); String corpusClass = MavunoUtils.getRequiredParam("Mavuno.PatternToPattern.CorpusClass", conf); String corpusPath = MavunoUtils.getRequiredParam("Mavuno.PatternToPattern.CorpusPath", conf); String extractorClass = MavunoUtils.getRequiredParam("Mavuno.PatternToPattern.ExtractorClass", conf); String extractorArgs = MavunoUtils.getRequiredParam("Mavuno.PatternToPattern.ExtractorArgs", conf); int minMatches = Integer.parseInt(MavunoUtils.getRequiredParam("Mavuno.PatternToPattern.MinMatches", conf)); boolean harvestGlobalStats = Boolean .parseBoolean(MavunoUtils.getRequiredParam("Mavuno.PatternToPattern.GlobalStats", conf)); String outputPath = MavunoUtils.getRequiredParam("Mavuno.PatternToPattern.OutputPath", conf); MavunoUtils.createDirectory(conf, outputPath); sLogger.info("Tool name: PatternToPattern"); sLogger.info(" - Pattern path: " + patternPath); sLogger.info(" - Corpus class: " + corpusClass); sLogger.info(" - Corpus path: " + corpusPath); sLogger.info(" - Output path: " + outputPath); sLogger.info(" - Context class: " + extractorClass); sLogger.info(" - Context arguments: " + extractorArgs); sLogger.info(" - Min matches: " + minMatches); sLogger.info(" - Harvest global stats: " + harvestGlobalStats); // pattern to context conf.set("Mavuno.PatternToContext.PatternPath", patternPath); conf.set("Mavuno.PatternToContext.CorpusPath", corpusPath); conf.set("Mavuno.PatternToContext.CorpusClass", corpusClass); conf.set("Mavuno.PatternToContext.ExtractorClass", extractorClass); conf.set("Mavuno.PatternToContext.ExtractorArgs", extractorArgs); conf.setInt("Mavuno.PatternToContext.MinMatches", minMatches); conf.setBoolean("Mavuno.PatternToContext.GlobalStats", harvestGlobalStats); conf.set("Mavuno.PatternToContext.OutputPath", outputPath); new PatternToContext(conf).run(); // context to pattern conf.set("Mavuno.ContextToPattern.ContextPath", outputPath + "/context-stats"); conf.set("Mavuno.ContextToPattern.CorpusPath", corpusPath); conf.set("Mavuno.ContextToPattern.CorpusClass", corpusClass); conf.set("Mavuno.ContextToPattern.ExtractorClass", extractorClass); conf.set("Mavuno.ContextToPattern.ExtractorArgs", extractorArgs); conf.setInt("Mavuno.ContextToPattern.MinMatches", minMatches); conf.setBoolean("Mavuno.ContextToPattern.GlobalStats", harvestGlobalStats); conf.set("Mavuno.ContextToPattern.OutputPath", outputPath); new ContextToPattern(conf).run(); return 0;/*from w ww .j a v a 2 s.com*/ }
From source file:edu.isi.mavuno.app.ie.HarvestEspressoContexts.java
License:Apache License
public int run() throws ClassNotFoundException, InterruptedException, IOException { Configuration conf = getConf(); String inputPath = MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoContexts.InputPath", conf); String corpusPath = MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoContexts.CorpusPath", conf); String corpusClass = MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoContexts.CorpusClass", conf); String extractorClass = MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoContexts.ExtractorClass", conf); String extractorArgs = MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoContexts.ExtractorArgs", conf); String scorerClass = MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoContexts.ScorerClass", conf); String scorerArgs = MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoContexts.ScorerArgs", conf); int numPatterns = Integer .parseInt(MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoContexts.NumPatterns", conf)); int minMatches = Integer .parseInt(MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoContexts.MinMatches", conf)); String baseOutputPath = MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoContexts.OutputPath", conf); String numIterations = MavunoUtils.getOptionalParam("Mavuno.HarvestEspressoContexts.NumIterations", conf); int iterations = 1; if (numIterations != null) { iterations = Integer.parseInt(numIterations); }// w ww .j a va2 s .c o m MavunoUtils.createDirectory(conf, baseOutputPath); sLogger.info("Tool name: HarvestEspressoContexts"); sLogger.info(" - Input path: " + inputPath); sLogger.info(" - Corpus path: " + corpusPath); sLogger.info(" - Corpus class: " + corpusClass); sLogger.info(" - Extractor class: " + extractorClass); sLogger.info(" - Extractor args: " + extractorArgs); sLogger.info(" - Scorer class: " + scorerClass); sLogger.info(" - Scorer args: " + scorerArgs); sLogger.info(" - Number of patterns: " + numPatterns); sLogger.info(" - Minimum matches: " + minMatches); sLogger.info(" - Iterations: " + iterations); sLogger.info(" - Output path: " + baseOutputPath); // initial sub output path MavunoUtils.createDirectory(conf, baseOutputPath + "/0"); MavunoUtils.createDirectory(conf, baseOutputPath + "/0/contexts-scored"); // examples -> sequence file conf.set("Mavuno.ExamplesToSequenceFile.InputPath", inputPath); conf.set("Mavuno.ExamplesToSequenceFile.OutputPath", baseOutputPath + "/0/contexts-scored/scored-contexts-raw"); new ExamplesToSequenceFile(conf).run(); // iterate procedure for (int i = 1; i <= iterations; i++) { // previous output path (input to current iteration) String prevOutputPath = baseOutputPath + "/" + (i - 1); // current output path String curOutputPath = baseOutputPath + "/" + i; MavunoUtils.createDirectory(conf, curOutputPath); // seeds -> patterns conf.set("Mavuno.ContextToPattern.ContextPath", prevOutputPath + "/contexts-scored/scored-contexts-raw"); conf.set("Mavuno.ContextToPattern.CorpusPath", corpusPath); conf.set("Mavuno.ContextToPattern.CorpusClass", corpusClass); conf.set("Mavuno.ContextToPattern.ExtractorClass", extractorClass); conf.set("Mavuno.ContextToPattern.ExtractorArgs", extractorArgs); conf.setInt("Mavuno.ContextToPattern.MinMatches", minMatches); conf.setBoolean("Mavuno.ContextToPattern.GlobalStats", true); conf.set("Mavuno.ContextToPattern.OutputPath", curOutputPath + "/patterns"); new ContextToPattern(conf).run(); // score patterns conf.set("Mavuno.ComputePatternScores.InputPath", curOutputPath + "/patterns"); conf.set("Mavuno.ComputePatternScores.ContextScorerClass", null); conf.set("Mavuno.ComputePatternScores.PatternScorerClass", scorerClass); conf.set("Mavuno.ComputePatternScores.PatternScorerArgs", scorerArgs); conf.set("Mavuno.ComputePatternScores.OutputPath", curOutputPath + "/patterns-scored"); new ComputePatternScores(conf).run(); // only retain top-(k * i) patterns if (numPatterns >= 0) { conf.set("Mavuno.GetTopResults.InputPath", curOutputPath + "/patterns-scored/scored-patterns"); conf.set("Mavuno.GetTopResults.OutputPath", curOutputPath + "/patterns-scored-top"); conf.setInt("Mavuno.GetTopResults.NumResults", numPatterns * i); conf.setBoolean("Mavuno.GetTopResults.SequenceFileOutputFormat", true); new GetTopResults(conf).run(); } // patterns -> contexts if (numPatterns >= 0) { conf.set("Mavuno.PatternToContext.PatternPath", curOutputPath + "/patterns-scored-top"); } else { conf.set("Mavuno.PatternToContext.PatternPath", curOutputPath + "/patterns-scored/scored-patterns-raw"); } conf.set("Mavuno.PatternToContext.CorpusPath", corpusPath); conf.set("Mavuno.PatternToContext.CorpusClass", corpusClass); conf.set("Mavuno.PatternToContext.ExtractorClass", extractorClass); conf.set("Mavuno.PatternToContext.ExtractorArgs", extractorArgs); conf.setInt("Mavuno.PatternToContext.MinMatches", minMatches); conf.setBoolean("Mavuno.PatternToContext.GlobalStats", true); conf.set("Mavuno.PatternToContext.OutputPath", curOutputPath + "/contexts"); new PatternToContext(conf).run(); // score contexts conf.set("Mavuno.ComputeContextScores.InputPath", curOutputPath + "/contexts"); conf.set("Mavuno.ComputeContextScores.PatternScorerClass", null); conf.set("Mavuno.ComputeContextScores.ContextScorerClass", scorerClass); conf.set("Mavuno.ComputeContextScores.ContextScorerArgs", scorerArgs); conf.set("Mavuno.ComputeContextScores.OutputPath", curOutputPath + "/contexts-scored"); new ComputeContextScores(conf).run(); // delete previous output path MavunoUtils.removeDirectory(conf, prevOutputPath); } return 0; }
From source file:edu.isi.mavuno.app.ie.HarvestEspressoPatterns.java
License:Apache License
public int run() throws ClassNotFoundException, InterruptedException, IOException { Configuration conf = getConf(); String inputPath = MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoPatterns.InputPath", conf); String corpusPath = MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoPatterns.CorpusPath", conf); String corpusClass = MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoPatterns.CorpusClass", conf); String extractorClass = MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoPatterns.ExtractorClass", conf); String extractorArgs = MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoPatterns.ExtractorArgs", conf); String scorerClass = MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoPatterns.ScorerClass", conf); String scorerArgs = MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoPatterns.ScorerArgs", conf); int numContexts = Integer .parseInt(MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoPatterns.NumContexts", conf)); int minMatches = Integer .parseInt(MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoPatterns.MinMatches", conf)); String baseOutputPath = MavunoUtils.getRequiredParam("Mavuno.HarvestEspressoPatterns.OutputPath", conf); String numIterations = MavunoUtils.getOptionalParam("Mavuno.HarvestEspressoPatterns.NumIterations", conf); int iterations = 1; if (numIterations != null) { iterations = Integer.parseInt(numIterations); }//from w w w. ja v a 2 s .c o m MavunoUtils.createDirectory(conf, baseOutputPath); sLogger.info("Tool name: HarvestEspressoPatterns"); sLogger.info(" - Input path: " + inputPath); sLogger.info(" - Corpus path: " + corpusPath); sLogger.info(" - Corpus class: " + corpusClass); sLogger.info(" - Extractor class: " + extractorClass); sLogger.info(" - Extractor args: " + extractorArgs); sLogger.info(" - Scorer class: " + scorerClass); sLogger.info(" - Scorer args: " + scorerArgs); sLogger.info(" - Number of contexts: " + numContexts); sLogger.info(" - Minimum matches: " + minMatches); sLogger.info(" - Iterations: " + iterations); sLogger.info(" - Output path: " + baseOutputPath); // initial sub output path MavunoUtils.createDirectory(conf, baseOutputPath + "/0"); MavunoUtils.createDirectory(conf, baseOutputPath + "/0/patterns-scored"); // patterns -> sequence file conf.set("Mavuno.ExamplesToSequenceFile.InputPath", inputPath); conf.set("Mavuno.ExamplesToSequenceFile.OutputPath", baseOutputPath + "/0/patterns-scored/scored-patterns-raw"); new ExamplesToSequenceFile(conf).run(); // iterate procedure for (int i = 1; i <= iterations; i++) { // previous output path (input to current iteration) String prevOutputPath = baseOutputPath + "/" + (i - 1); // current output path String curOutputPath = baseOutputPath + "/" + i; MavunoUtils.createDirectory(conf, curOutputPath); // seeds -> contexts conf.set("Mavuno.PatternToContext.PatternPath", prevOutputPath + "/patterns-scored/scored-patterns-raw"); conf.set("Mavuno.PatternToContext.CorpusPath", corpusPath); conf.set("Mavuno.PatternToContext.CorpusClass", corpusClass); conf.set("Mavuno.PatternToContext.ExtractorClass", extractorClass); conf.set("Mavuno.PatternToContext.ExtractorArgs", extractorArgs); conf.setInt("Mavuno.PatternToContext.MinMatches", minMatches); conf.setBoolean("Mavuno.PatternToContext.GlobalStats", true); conf.set("Mavuno.PatternToContext.OutputPath", curOutputPath + "/contexts"); new PatternToContext(conf).run(); // score contexts conf.set("Mavuno.ComputeContextScores.InputPath", curOutputPath + "/contexts"); conf.set("Mavuno.ComputeContextScores.PatternScorerClass", null); conf.set("Mavuno.ComputeContextScores.ContextScorerClass", scorerClass); conf.set("Mavuno.ComputeContextScores.ContextScorerArgs", scorerArgs); conf.set("Mavuno.ComputeContextScores.OutputPath", curOutputPath + "/contexts-scored"); new ComputeContextScores(conf).run(); // only retain top-(k * i) contexts if (numContexts >= 0) { conf.set("Mavuno.GetTopResults.InputPath", curOutputPath + "/contexts-scored/scored-contexts"); conf.set("Mavuno.GetTopResults.OutputPath", curOutputPath + "/contexts-scored-top"); conf.setInt("Mavuno.GetTopResults.NumResults", numContexts * i); conf.setBoolean("Mavuno.GetTopResults.SequenceFileOutputFormat", true); new GetTopResults(conf).run(); } // contexts -> patterns if (numContexts >= 0) { conf.set("Mavuno.ContextToPattern.ContextPath", curOutputPath + "/contexts-scored-top"); } else { conf.set("Mavuno.ContextToPattern.ContextPath", curOutputPath + "/contexts-scored/scored-contexts-raw"); } conf.set("Mavuno.ContextToPattern.CorpusPath", corpusPath); conf.set("Mavuno.ContextToPattern.CorpusClass", corpusClass); conf.set("Mavuno.ContextToPattern.ExtractorClass", extractorClass); conf.set("Mavuno.ContextToPattern.ExtractorArgs", extractorArgs); conf.setInt("Mavuno.ContextToPattern.MinMatches", minMatches); conf.setBoolean("Mavuno.ContextToPattern.GlobalStats", true); conf.set("Mavuno.ContextToPattern.OutputPath", curOutputPath + "/patterns"); new ContextToPattern(conf).run(); // score patterns conf.set("Mavuno.ComputePatternScores.InputPath", curOutputPath + "/patterns"); conf.set("Mavuno.ComputePatternScores.ContextScorerClass", null); conf.set("Mavuno.ComputePatternScores.PatternScorerClass", scorerClass); conf.set("Mavuno.ComputePatternScores.PatternScorerArgs", scorerArgs); conf.set("Mavuno.ComputePatternScores.OutputPath", curOutputPath + "/patterns-scored"); new ComputePatternScores(conf).run(); // delete previous output path MavunoUtils.removeDirectory(conf, prevOutputPath); } return 0; }
From source file:edu.isi.mavuno.extract.ExtractGlobalStats.java
License:Apache License
@SuppressWarnings({ "unchecked", "rawtypes" }) public int run() throws ClassNotFoundException, InterruptedException, IOException { Configuration conf = getConf(); String inputPath = MavunoUtils.getRequiredParam("Mavuno.ExtractGlobalStats.InputPath", conf); String corpusPath = MavunoUtils.getRequiredParam("Mavuno.ExtractGlobalStats.CorpusPath", conf); String corpusClass = MavunoUtils.getRequiredParam("Mavuno.ExtractGlobalStats.CorpusClass", conf); String extractorClass = MavunoUtils.getRequiredParam("Mavuno.ExtractGlobalStats.ExtractorClass", conf); String extractorArgs = MavunoUtils.getRequiredParam("Mavuno.ExtractGlobalStats.ExtractorArgs", conf); String extractorTarget = MavunoUtils.getRequiredParam("Mavuno.ExtractGlobalStats.ExtractorTarget", conf) .toLowerCase();/*from w ww. ja va 2 s . com*/ String outputPath = MavunoUtils.getRequiredParam("Mavuno.ExtractGlobalStats.OutputPath", conf); // split examples conf.set("Mavuno.Split.InputPath", inputPath); conf.set("Mavuno.Split.OutputPath", outputPath + "/../split"); conf.set("Mavuno.Split.SplitKey", extractorTarget); new Split(conf).run(); // get splits FileStatus[] files = MavunoUtils.getDirectoryListing(conf, outputPath + "/../split"); int split = 0; for (FileStatus file : files) { if (!file.getPath().getName().endsWith(".examples")) { continue; } conf.set("Mavuno.ExtractGlobalStats.ExamplesPath", file.getPath().toString()); sLogger.info("Tool name: ExtractGlobalStats"); sLogger.info(" - Input path: " + inputPath); sLogger.info(" - Examples path: " + file.getPath()); sLogger.info(" - Example split: " + split); sLogger.info(" - Corpus path: " + corpusPath); sLogger.info(" - Corpus class: " + corpusClass); sLogger.info(" - Extractor class: " + extractorClass); sLogger.info(" - Extractor class: " + extractorArgs); sLogger.info(" - Extractor target: " + extractorTarget); sLogger.info(" - Output path: " + outputPath); Job job = new Job(conf); job.setJobName("ExtractGlobalStats"); MavunoUtils.recursivelyAddInputPaths(job, corpusPath); FileOutputFormat.setOutputPath(job, new Path(outputPath + "/../split/" + split)); job.setInputFormatClass((Class<? extends InputFormat>) Class.forName(corpusClass)); job.setOutputFormatClass(SequenceFileOutputFormat.class); FileOutputFormat.setCompressOutput(job, true); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); job.setMapOutputKeyClass(ContextPatternWritable.class); job.setSortComparatorClass(ContextPatternWritable.Comparator.class); job.setPartitionerClass(ContextPatternWritable.FullPartitioner.class); job.setMapOutputValueClass(ContextPatternStatsWritable.class); job.setOutputKeyClass(ContextPatternWritable.class); job.setOutputValueClass(ContextPatternStatsWritable.class); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); job.waitForCompletion(true); split++; } // combine splits conf.setInt("Mavuno.CombineGlobalStats.TotalSplits", split); conf.set("Mavuno.CombineGlobalStats.InputPath", outputPath + "/../split/"); conf.set("Mavuno.CombineGlobalStats.OutputPath", outputPath); new CombineGlobalStats(conf).run(); MavunoUtils.removeDirectory(conf, outputPath + "/../split"); return 0; }
From source file:edu.iu.ccd.CCDLauncher.java
License:Apache License
private Job configureCCDJob(Path inputDir, int r, double lambda, int numIterations, int numMapTasks, int numThreadsPerWorker, int numModelSlices, Path modelDir, Path outputDir, String testFilePath, Configuration configuration, int jobID) throws IOException, URISyntaxException { configuration.setInt(Constants.R, r); configuration.setDouble(Constants.LAMBDA, lambda); configuration.setInt(Constants.NUM_ITERATIONS, numIterations); configuration.setInt(Constants.NUM_THREADS, numThreadsPerWorker); System.out.println("Model Dir Path: " + modelDir.toString()); configuration.set(Constants.MODEL_DIR, modelDir.toString()); configuration.setInt(Constants.NUM_MODEL_SLICES, numModelSlices); configuration.set(Constants.TEST_FILE_PATH, testFilePath); Job job = Job.getInstance(configuration, "ccd_job_" + jobID); JobConf jobConf = (JobConf) job.getConfiguration(); jobConf.set("mapreduce.framework.name", "map-collective"); jobConf.setNumMapTasks(numMapTasks); jobConf.setInt("mapreduce.job.max.split.locations", 10000); FileInputFormat.setInputPaths(job, inputDir); FileOutputFormat.setOutputPath(job, outputDir); job.setInputFormatClass(MultiFileInputFormat.class); job.setJarByClass(CCDLauncher.class); job.setMapperClass(CCDMPCollectiveMapper.class); job.setNumReduceTasks(0);//from w w w . j ava 2s .co m return job; }
From source file:edu.iu.daal_adaboost.ADABOOSTDaalLauncher.java
License:Apache License
/** * Launches all the tasks in order./*ww w .java 2 s .c o m*/ */ @Override public int run(String[] args) throws Exception { /* Put shared libraries into the distributed cache */ Configuration conf = this.getConf(); Initialize init = new Initialize(conf, args); /* Put shared libraries into the distributed cache */ init.loadDistributedLibs(); // load args init.loadSysArgs(); //load app args conf.setInt(HarpDAALConstants.FEATURE_DIM, Integer.parseInt(args[init.getSysArgNum()])); conf.setInt(HarpDAALConstants.FILE_DIM, Integer.parseInt(args[init.getSysArgNum() + 1])); conf.set(HarpDAALConstants.TEST_FILE_PATH, args[init.getSysArgNum() + 2]); // launch job System.out.println("Starting Job"); long perJobSubmitTime = System.currentTimeMillis(); System.out.println( "Start Job#" + " " + new SimpleDateFormat("HH:mm:ss.SSS").format(Calendar.getInstance().getTime())); Job adaboostJob = init.createJob("AdaboostJob", ADABOOSTDaalLauncher.class, ADABOOSTDaalCollectiveMapper.class); // finish job boolean jobSuccess = adaboostJob.waitForCompletion(true); System.out.println( "End Job#" + " " + new SimpleDateFormat("HH:mm:ss.SSS").format(Calendar.getInstance().getTime())); System.out.println( "| Job#" + " Finished in " + (System.currentTimeMillis() - perJobSubmitTime) + " miliseconds |"); if (!jobSuccess) { adaboostJob.killJob(); System.out.println("Adaboost Job failed"); } return 0; }
From source file:edu.iu.daal_als.ALSDaalLauncher.java
License:Apache License
/** * Launches ALS workers./*from w w w .ja v a 2s . c om*/ */ @Override public int run(String[] args) throws Exception { /* Put shared libraries into the distributed cache */ Configuration conf = this.getConf(); Initialize init = new Initialize(conf, args); /* Put shared libraries into the distributed cache */ init.loadDistributedLibs(); // load args init.loadSysArgs(); conf.setInt(HarpDAALConstants.NUM_FACTOR, Integer.parseInt(args[init.getSysArgNum()])); conf.setDouble(Constants.ALPHA, Double.parseDouble(args[init.getSysArgNum() + 1])); conf.setDouble(Constants.LAMBDA, Double.parseDouble(args[init.getSysArgNum() + 2])); conf.set(HarpDAALConstants.TEST_FILE_PATH, args[init.getSysArgNum() + 3]); // launch job System.out.println("Starting Job"); long perJobSubmitTime = System.currentTimeMillis(); System.out.println( "Start Job#" + " " + new SimpleDateFormat("HH:mm:ss.SSS").format(Calendar.getInstance().getTime())); Job alsJob = init.createJob("alsJob", ALSDaalLauncher.class, ALSDaalCollectiveMapper.class); // finish job boolean jobSuccess = alsJob.waitForCompletion(true); System.out.println( "End Job#" + " " + new SimpleDateFormat("HH:mm:ss.SSS").format(Calendar.getInstance().getTime())); System.out.println( "| Job#" + " Finished in " + (System.currentTimeMillis() - perJobSubmitTime) + " miliseconds |"); if (!jobSuccess) { alsJob.killJob(); System.out.println("alsJob failed"); } return 0; }
From source file:edu.iu.daal_als_batch.ALSBatchDaalLauncher.java
License:Apache License
/** * Launches all the tasks in order.//from ww w .j a v a 2 s . c o m */ @Override public int run(String[] args) throws Exception { /* Put shared libraries into the distributed cache */ Configuration conf = this.getConf(); Initialize init = new Initialize(conf, args); /* Put shared libraries into the distributed cache */ init.loadDistributedLibs(); // load args init.loadSysArgs(); //load app args conf.setInt(HarpDAALConstants.FILE_DIM, Integer.parseInt(args[init.getSysArgNum()])); conf.setInt(HarpDAALConstants.NUM_FACTOR, Integer.parseInt(args[init.getSysArgNum() + 1])); // launch job System.out.println("Starting Job"); long perJobSubmitTime = System.currentTimeMillis(); System.out.println( "Start Job#" + " " + new SimpleDateFormat("HH:mm:ss.SSS").format(Calendar.getInstance().getTime())); Job alsbatchJob = init.createJob("AlsbatchJob", ALSBatchDaalLauncher.class, ALSBatchDaalCollectiveMapper.class); // finish job boolean jobSuccess = alsbatchJob.waitForCompletion(true); System.out.println( "End Job#" + " " + new SimpleDateFormat("HH:mm:ss.SSS").format(Calendar.getInstance().getTime())); System.out.println( "| Job#" + " Finished in " + (System.currentTimeMillis() - perJobSubmitTime) + " miliseconds |"); if (!jobSuccess) { alsbatchJob.killJob(); System.out.println("ALSbatch Job failed"); } return 0; }