List of usage examples for org.apache.hadoop.conf Configuration setBoolean
public void setBoolean(String name, boolean value)
name
property to a boolean
. From source file:kogiri.common.hadoop.io.format.fasta.FastaReadDescriptionInputFormat.java
License:Open Source License
public static void setSplitable(Configuration conf, boolean splitable) { conf.setBoolean(CONF_SPLITABLE, splitable); }
From source file:ml.shifu.guagua.hadoop.io.GuaguaOptionsParser.java
License:Apache License
/** * Modify configuration according user-specified generic options * /*from w w w .j a v a2 s. c o m*/ * @param conf * Configuration to be modified * @param line * User-specified generic options */ private void processGeneralOptions(Configuration conf, CommandLine line) throws IOException { if (line.hasOption("fs")) { FileSystem.setDefaultUri(conf, line.getOptionValue("fs")); } if (line.hasOption("jt")) { conf.set("mapred.job.tracker", line.getOptionValue("jt")); } if (line.hasOption("conf")) { String[] values = line.getOptionValues("conf"); for (String value : values) { conf.addResource(new Path(value)); } } if (line.hasOption("libjars")) { conf.set("tmpjars", validateFiles(line.getOptionValue("libjars"), conf)); // setting libjars in client classpath URL[] libjars = getLibJars(conf); if (libjars != null && libjars.length > 0) { conf.setClassLoader(new URLClassLoader(libjars, conf.getClassLoader())); Thread.currentThread().setContextClassLoader( new URLClassLoader(libjars, Thread.currentThread().getContextClassLoader())); } } if (line.hasOption("files")) { conf.set("tmpfiles", validateFiles(line.getOptionValue("files"), conf)); } if (line.hasOption("archives")) { conf.set("tmparchives", validateFiles(line.getOptionValue("archives"), conf)); } if (line.hasOption('D')) { String[] property = line.getOptionValues('D'); for (String prop : property) { String[] keyval = prop.split("=", 2); if (keyval.length == 2) { conf.set(keyval[0], keyval[1]); } } } conf.setBoolean("mapred.used.genericoptionsparser", true); // tokensFile if (line.hasOption("tokenCacheFile")) { String fileName = line.getOptionValue("tokenCacheFile"); // check if the local file exists try { FileSystem localFs = FileSystem.getLocal(conf); Path p = new Path(fileName); if (!localFs.exists(p)) { throw new FileNotFoundException("File " + fileName + " does not exist."); } LOG.debug("setting conf tokensFile: {}", fileName); conf.set("mapreduce.job.credentials.json", localFs.makeQualified(p).toString()); } catch (IOException e) { throw new RuntimeException(e); } } }
From source file:ml.shifu.guagua.mapreduce.example.nn.NNMapReduceClient.java
License:Apache License
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 6) { throw new IllegalArgumentException( "NNMapReduceClient: Must have at least 5 arguments <guagua.iteration.count> <guagua.zk.servers> <nn.test.scale> <nn.record.scales> <input path or folder> <guagua.nn.output>. "); }//from www.j a va2s .co m conf.set(GuaguaConstants.WORKER_COMPUTABLE_CLASS, NNWorker.class.getName()); conf.set(GuaguaConstants.MASTER_COMPUTABLE_CLASS, NNMaster.class.getName()); conf.set(GuaguaConstants.GUAGUA_ITERATION_COUNT, otherArgs[0]); conf.set(GuaguaConstants.GUAGUA_ZK_SERVERS, otherArgs[1]); conf.set(NNConstants.NN_TEST_SCALE, otherArgs[2]); conf.set(NNConstants.NN_RECORD_SCALE, otherArgs[3]); conf.set(GuaguaConstants.GUAGUA_MASTER_RESULT_CLASS, NNParams.class.getName()); conf.set(GuaguaConstants.GUAGUA_WORKER_RESULT_CLASS, NNParams.class.getName()); conf.setInt(NNConstants.GUAGUA_NN_INPUT_NODES, NNConstants.GUAGUA_NN_DEFAULT_INPUT_NODES); conf.setInt(NNConstants.GUAGUA_NN_HIDDEN_NODES, NNConstants.GUAGUA_NN_DEFAULT_HIDDEN_NODES); conf.setInt(NNConstants.GUAGUA_NN_OUTPUT_NODES, NNConstants.GUAGUA_NN_DEFAULT_OUTPUT_NODES); conf.set(NNConstants.GUAGUA_NN_ALGORITHM, NNConstants.GUAGUA_NN_DEFAULT_ALGORITHM); conf.setInt(NNConstants.GUAGUA_NN_THREAD_COUNT, NNConstants.GUAGUA_NN_DEFAULT_THREAD_COUNT); conf.set(NNConstants.GUAGUA_NN_LEARNING_RATE, NNConstants.GUAGUA_NN_DEFAULT_LEARNING_RATE); conf.set(NNConstants.GUAGUA_NN_OUTPUT, otherArgs[5]); conf.set(GuaguaConstants.GUAGUA_MASTER_INTERCEPTERS, NNOutput.class.getName()); conf.setBoolean(GuaguaMapReduceConstants.MAPRED_MAP_TASKS_SPECULATIVE_EXECUTION, false); conf.setBoolean(GuaguaMapReduceConstants.MAPRED_REDUCE_TASKS_SPECULATIVE_EXECUTION, false); conf.setInt(GuaguaMapReduceConstants.MAPRED_TASK_TIMEOUT, 3600000); conf.setInt(GuaguaMapReduceConstants.IO_SORT_MB, 0); Job job = new Job(conf, "Guagua NN Master-Workers Job"); job.setJarByClass(NNMapReduceClient.class); job.setMapperClass(GuaguaMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setInputFormatClass(NNInputFormat.class); job.setOutputFormatClass(GuaguaOutputFormat.class); job.setNumReduceTasks(0); FileInputFormat.addInputPath(job, new Path(otherArgs[4])); job.waitForCompletion(true); }
From source file:ml.shifu.guagua.mapreduce.example.sum.SumMapReduceClient.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 3) { throw new IllegalArgumentException( "NNMapReduceClient: Must have at least 2 arguments <guagua.iteration.count> <guagua.zk.servers> <input path or folder>. "); }//from w ww .j a v a 2 s .co m conf.set(GuaguaConstants.WORKER_COMPUTABLE_CLASS, SumWorker.class.getName()); conf.set(GuaguaConstants.MASTER_COMPUTABLE_CLASS, SumMaster.class.getName()); conf.set(GuaguaConstants.GUAGUA_ITERATION_COUNT, otherArgs[0]); conf.set(GuaguaConstants.GUAGUA_ZK_SERVERS, otherArgs[1]); conf.setInt(GuaguaConstants.GUAGUA_ZK_SESSION_TIMEOUT, 300 * 1000); conf.setInt(GuaguaConstants.GUAGUA_ZK_MAX_ATTEMPTS, 5); conf.setInt(GuaguaConstants.GUAGUA_ZK_RETRY_WAIT_MILLS, 1000); // if you set result class to hadoop Writable, you must use GuaguaWritableSerializer, this can be avoided by // using GuaguaMapReduceClient conf.set(GuaguaConstants.GUAGUA_MASTER_RESULT_CLASS, LongWritable.class.getName()); conf.set(GuaguaConstants.GUAGUA_WORKER_RESULT_CLASS, LongWritable.class.getName()); conf.set(GuaguaConstants.GUAGUA_MASTER_IO_SERIALIZER, "ml.shifu.guagua.mapreduce.GuaguaWritableSerializer"); conf.set(GuaguaConstants.GUAGUA_WORKER_IO_SERIALIZER, "ml.shifu.guagua.mapreduce.GuaguaWritableSerializer"); conf.setBoolean(GuaguaMapReduceConstants.MAPRED_MAP_TASKS_SPECULATIVE_EXECUTION, false); conf.setBoolean(GuaguaMapReduceConstants.MAPRED_REDUCE_TASKS_SPECULATIVE_EXECUTION, false); conf.setInt(GuaguaMapReduceConstants.MAPRED_TASK_TIMEOUT, 3600000); conf.setInt(GuaguaMapReduceConstants.IO_SORT_MB, 0); Job job = new Job(conf, "Guagua Sum Master-Workers Job"); job.setJarByClass(SumMapReduceClient.class); job.setMapperClass(GuaguaMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setInputFormatClass(GuaguaInputFormat.class); job.setOutputFormatClass(GuaguaOutputFormat.class); job.setNumReduceTasks(0); FileInputFormat.addInputPath(job, new Path(otherArgs[2])); job.waitForCompletion(true); }
From source file:ml.shifu.guagua.mapreduce.GuaguaMapReduceClient.java
License:Apache License
/** * Create Hadoop job according to arguments from main. *//*from w ww . j a v a 2 s . c o m*/ @SuppressWarnings("deprecation") public synchronized Job createJob(String[] args) throws IOException { Configuration conf = new Configuration(); // set it here to make it can be over-written. Set task timeout to a long period 20 minutes. conf.setInt(GuaguaMapReduceConstants.MAPRED_TASK_TIMEOUT, conf.getInt(GuaguaMapReduceConstants.MAPRED_TASK_TIMEOUT, 1800000)); GuaguaOptionsParser parser = new GuaguaOptionsParser(conf, args); // process a bug on hdp 2.2.4 String hdpVersion = HDPUtils.getHdpVersionForHDP224(); if (hdpVersion != null && hdpVersion.length() != 0) { conf.set("hdp.version", hdpVersion); HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("hdfs-site.xml"), conf); HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("core-site.xml"), conf); HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("mapred-site.xml"), conf); HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("yarn-site.xml"), conf); } CommandLine cmdLine = parser.getCommandLine(); checkInputSetting(conf, cmdLine); checkZkServerSetting(conf, cmdLine); checkWorkerClassSetting(conf, cmdLine); checkMasterClassSetting(conf, cmdLine); checkIterationCountSetting(conf, cmdLine); checkResultClassSetting(conf, cmdLine); String name = checkMapReduceNameSetting(cmdLine); @SuppressWarnings("rawtypes") Class<? extends InputFormat> inputFormatClass = checkInputFormatSetting(cmdLine); // set map reduce parameters for specified master-workers architecture // speculative execution should be disabled conf.setBoolean(GuaguaMapReduceConstants.MAPRED_MAP_TASKS_SPECULATIVE_EXECUTION, false); conf.setBoolean(GuaguaMapReduceConstants.MAPRED_REDUCE_TASKS_SPECULATIVE_EXECUTION, false); // set mapreduce.job.max.split.locations to 100 to suppress warnings int maxSplits = conf.getInt(GuaguaMapReduceConstants.MAPREDUCE_JOB_MAX_SPLIT_LOCATIONS, 100); if (maxSplits < 100) { maxSplits = 100; } conf.setInt(GuaguaMapReduceConstants.MAPREDUCE_JOB_MAX_SPLIT_LOCATIONS, maxSplits); // Set cache to 0. conf.setInt(GuaguaMapReduceConstants.IO_SORT_MB, 0); // Most users won't hit this hopefully and can set it higher if desired conf.setInt(GuaguaMapReduceConstants.MAPREDUCE_JOB_COUNTERS_LIMIT, conf.getInt(GuaguaMapReduceConstants.MAPREDUCE_JOB_COUNTERS_LIMIT, 512)); conf.setInt(GuaguaMapReduceConstants.MAPRED_JOB_REDUCE_MEMORY_MB, 0); // append concurrent gc to avoid long gc stop-the-world String childJavaOpts = conf.get(GuaguaMapReduceConstants.MAPRED_CHILD_JAVA_OPTS, ""); if (childJavaOpts == null || childJavaOpts.length() == 0) { conf.set(GuaguaMapReduceConstants.MAPRED_CHILD_JAVA_OPTS, GuaguaMapReduceConstants.MAPRED_DEFAULT_CHILD_JAVA_OPTS); } else { String newChildJavaOpts = GuaguaMapReduceConstants.MAPRED_DEFAULT_CHILD_JAVA_OPTS + " " + childJavaOpts; conf.set(GuaguaMapReduceConstants.MAPRED_CHILD_JAVA_OPTS, newChildJavaOpts.trim()); } Job job = new Job(conf, name); job.setJarByClass(GuaguaMapReduceClient.class); job.setMapperClass(GuaguaMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setInputFormatClass(inputFormatClass); job.setOutputFormatClass(GuaguaOutputFormat.class); job.setNumReduceTasks(0); return job; }
From source file:ml.shifu.shifu.core.processor.InitModelProcessor.java
License:Apache License
private Map<Integer, Long> getApproxDistinctCountByMRJob() throws IOException, InterruptedException, ClassNotFoundException { SourceType source = this.modelConfig.getDataSet().getSource(); Configuration conf = new Configuration(); // add jars to hadoop mapper and reducer new GenericOptionsParser(conf, new String[] { "-libjars", addRuntimeJars() }); conf.setBoolean(GuaguaMapReduceConstants.MAPRED_MAP_TASKS_SPECULATIVE_EXECUTION, true); conf.setBoolean(GuaguaMapReduceConstants.MAPRED_REDUCE_TASKS_SPECULATIVE_EXECUTION, true); conf.set(NNConstants.MAPRED_JOB_QUEUE_NAME, Environment.getProperty(Environment.HADOOP_JOB_QUEUE, "default")); conf.setInt(GuaguaMapReduceConstants.MAPREDUCE_JOB_MAX_SPLIT_LOCATIONS, 30); conf.set(Constants.SHIFU_MODEL_CONFIG, ShifuFileUtils.getFileSystemBySourceType(source) .makeQualified(new Path(super.getPathFinder().getModelConfigPath(source))).toString()); conf.set("mapred.reduce.slowstart.completed.maps", Environment.getProperty("mapred.reduce.slowstart.completed.maps", "0.9")); String hdpVersion = HDPUtils.getHdpVersionForHDP224(); if (StringUtils.isNotBlank(hdpVersion)) { // for hdp 2.2.4, hdp.version should be set and configuration files should be add to container class path conf.set("hdp.version", hdpVersion); HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("hdfs-site.xml"), conf); HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("core-site.xml"), conf); HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("mapred-site.xml"), conf); HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("yarn-site.xml"), conf); }//from w w w .j a va 2 s . c om @SuppressWarnings("deprecation") Job job = new Job(conf, "Shifu: Column Type Auto Checking Job : " + this.modelConfig.getModelSetName()); job.setJarByClass(getClass()); job.setMapperClass(AutoTypeDistinctCountMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(BytesWritable.class); job.setInputFormatClass(TextInputFormat.class); FileInputFormat.setInputPaths(job, ShifuFileUtils.getFileSystemBySourceType(source) .makeQualified(new Path(super.modelConfig.getDataSetRawPath()))); job.setReducerClass(AutoTypeDistinctCountReducer.class); job.setNumReduceTasks(1); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(LongWritable.class); job.setOutputFormatClass(TextOutputFormat.class); String autoTypePath = super.getPathFinder().getAutoTypeFilePath(source); FileOutputFormat.setOutputPath(job, new Path(autoTypePath)); // clean output firstly ShifuFileUtils.deleteFile(autoTypePath, source); // submit job if (job.waitForCompletion(true)) { return getDistinctCountMap(source, autoTypePath); } else { throw new RuntimeException("MapReduce Job Auto Type Distinct Count failed."); } }
From source file:ml.shifu.shifu.core.processor.PostTrainModelProcessor.java
License:Apache License
private void runMRBinAvgScoreJob(SourceType source, String postTrainOutputPath) throws IOException, InterruptedException, ClassNotFoundException { final Configuration conf = new Configuration(); // add jars to hadoop mapper and reducer new GenericOptionsParser(conf, new String[] { "-libjars", addRuntimeJars() }); conf.setBoolean(CombineInputFormat.SHIFU_VS_SPLIT_COMBINABLE, true); conf.setBoolean("mapreduce.input.fileinputformat.input.dir.recursive", true); conf.set(Constants.SHIFU_STATS_EXLCUDE_MISSING, Environment.getProperty(Constants.SHIFU_STATS_EXLCUDE_MISSING, "true")); conf.setBoolean(GuaguaMapReduceConstants.MAPRED_MAP_TASKS_SPECULATIVE_EXECUTION, true); conf.setBoolean(GuaguaMapReduceConstants.MAPRED_REDUCE_TASKS_SPECULATIVE_EXECUTION, true); conf.set(Constants.SHIFU_MODEL_CONFIG, ShifuFileUtils.getFileSystemBySourceType(source) .makeQualified(new Path(super.getPathFinder().getModelConfigPath(source))).toString()); conf.set(Constants.SHIFU_COLUMN_CONFIG, ShifuFileUtils.getFileSystemBySourceType(source) .makeQualified(new Path(super.getPathFinder().getColumnConfigPath(source))).toString()); conf.set(NNConstants.MAPRED_JOB_QUEUE_NAME, Environment.getProperty(Environment.HADOOP_JOB_QUEUE, "default")); conf.set(Constants.SHIFU_MODELSET_SOURCE_TYPE, source.toString()); // set mapreduce.job.max.split.locations to 30 to suppress warnings conf.setInt(GuaguaMapReduceConstants.MAPREDUCE_JOB_MAX_SPLIT_LOCATIONS, 5000); conf.set("mapred.reduce.slowstart.completed.maps", Environment.getProperty("mapred.reduce.slowstart.completed.maps", "0.8")); String hdpVersion = HDPUtils.getHdpVersionForHDP224(); if (StringUtils.isNotBlank(hdpVersion)) { // for hdp 2.2.4, hdp.version should be set and configuration files should be add to container class path conf.set("hdp.version", hdpVersion); HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("hdfs-site.xml"), conf); HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("core-site.xml"), conf); HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("mapred-site.xml"), conf); HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("yarn-site.xml"), conf); }/*ww w . ja va2 s .c o m*/ // one can set guagua conf in shifuconfig CommonUtils.injectHadoopShifuEnvironments(new ValueVisitor() { @Override public void inject(Object key, Object value) { conf.set(key.toString(), value.toString()); } }); @SuppressWarnings("deprecation") Job job = new Job(conf, "Shifu: Post Train : " + this.modelConfig.getModelSetName()); job.setJarByClass(getClass()); job.setMapperClass(PostTrainMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(FeatureStatsWritable.class); job.setInputFormatClass(CombineInputFormat.class); FileInputFormat.setInputPaths(job, ShifuFileUtils.getFileSystemBySourceType(source) .makeQualified(new Path(super.modelConfig.getDataSetRawPath()))); MultipleOutputs.addNamedOutput(job, Constants.POST_TRAIN_OUTPUT_SCORE, TextOutputFormat.class, NullWritable.class, Text.class); job.setReducerClass(PostTrainReducer.class); job.setNumReduceTasks(1); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(TextOutputFormat.class); FileOutputFormat.setOutputPath(job, new Path(postTrainOutputPath)); // clean output firstly ShifuFileUtils.deleteFile(postTrainOutputPath, source); // submit job if (!job.waitForCompletion(true)) { throw new RuntimeException("Post train Bin Avg Score MapReduce job is failed."); } }
From source file:ml.shifu.shifu.core.processor.PostTrainModelProcessor.java
License:Apache License
private void runMRFeatureImportanceJob(SourceType source, String output) throws IOException, InterruptedException, ClassNotFoundException { final Configuration conf = new Configuration(); // add jars to hadoop mapper and reducer new GenericOptionsParser(conf, new String[] { "-libjars", addRuntimeJars() }); conf.setBoolean(CombineInputFormat.SHIFU_VS_SPLIT_COMBINABLE, true); conf.setBoolean("mapreduce.input.fileinputformat.input.dir.recursive", true); conf.set(Constants.SHIFU_STATS_EXLCUDE_MISSING, Environment.getProperty(Constants.SHIFU_STATS_EXLCUDE_MISSING, "true")); conf.setBoolean(GuaguaMapReduceConstants.MAPRED_MAP_TASKS_SPECULATIVE_EXECUTION, true); conf.setBoolean(GuaguaMapReduceConstants.MAPRED_REDUCE_TASKS_SPECULATIVE_EXECUTION, true); conf.set(Constants.SHIFU_MODEL_CONFIG, ShifuFileUtils.getFileSystemBySourceType(source) .makeQualified(new Path(super.getPathFinder().getModelConfigPath(source))).toString()); conf.set(Constants.SHIFU_COLUMN_CONFIG, ShifuFileUtils.getFileSystemBySourceType(source) .makeQualified(new Path(super.getPathFinder().getColumnConfigPath(source))).toString()); conf.set(NNConstants.MAPRED_JOB_QUEUE_NAME, Environment.getProperty(Environment.HADOOP_JOB_QUEUE, "default")); conf.set(Constants.SHIFU_MODELSET_SOURCE_TYPE, source.toString()); // set mapreduce.job.max.split.locations to 30 to suppress warnings conf.setInt(GuaguaMapReduceConstants.MAPREDUCE_JOB_MAX_SPLIT_LOCATIONS, 5000); conf.set("mapred.reduce.slowstart.completed.maps", Environment.getProperty("mapred.reduce.slowstart.completed.maps", "0.8")); String hdpVersion = HDPUtils.getHdpVersionForHDP224(); if (StringUtils.isNotBlank(hdpVersion)) { // for hdp 2.2.4, hdp.version should be set and configuration files should be add to container class path conf.set("hdp.version", hdpVersion); HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("hdfs-site.xml"), conf); HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("core-site.xml"), conf); HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("mapred-site.xml"), conf); HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("yarn-site.xml"), conf); }/*ww w .j av a 2 s .c o m*/ // one can set guagua conf in shifuconfig CommonUtils.injectHadoopShifuEnvironments(new ValueVisitor() { @Override public void inject(Object key, Object value) { conf.set(key.toString(), value.toString()); } }); @SuppressWarnings("deprecation") Job job = new Job(conf, "Shifu: Post Train FeatureImportance : " + this.modelConfig.getModelSetName()); job.setJarByClass(getClass()); job.setMapperClass(FeatureImportanceMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(DoubleWritable.class); job.setInputFormatClass(CombineInputFormat.class); FileInputFormat.setInputPaths(job, ShifuFileUtils.getFileSystemBySourceType(source) .makeQualified(new Path(super.modelConfig.getDataSetRawPath()))); job.setReducerClass(FeatureImportanceReducer.class); job.setNumReduceTasks(1); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(DoubleWritable.class); job.setOutputFormatClass(TextOutputFormat.class); FileOutputFormat.setOutputPath(job, new Path(output)); // clean output firstly ShifuFileUtils.deleteFile(output, source); // submit job if (!job.waitForCompletion(true)) { throw new RuntimeException("Post train Feature Importance MapReduce job is failed."); } }
From source file:ml.shifu.shifu.core.processor.stats.MapReducerStatsWorker.java
License:Apache License
private void prepareJobConf(RawSourceData.SourceType source, final Configuration conf, String filePath) throws IOException { // add jars to hadoop mapper and reducer new GenericOptionsParser(conf, new String[] { "-libjars", addRuntimeJars(), "-files", filePath }); conf.setBoolean(CombineInputFormat.SHIFU_VS_SPLIT_COMBINABLE, true); conf.setBoolean("mapreduce.input.fileinputformat.input.dir.recursive", true); conf.set(Constants.SHIFU_STATS_EXLCUDE_MISSING, Environment.getProperty(Constants.SHIFU_STATS_EXLCUDE_MISSING, "true")); conf.setBoolean(GuaguaMapReduceConstants.MAPRED_MAP_TASKS_SPECULATIVE_EXECUTION, true); conf.setBoolean(GuaguaMapReduceConstants.MAPRED_REDUCE_TASKS_SPECULATIVE_EXECUTION, true); conf.setBoolean(GuaguaMapReduceConstants.MAPREDUCE_MAP_SPECULATIVE, true); conf.setBoolean(GuaguaMapReduceConstants.MAPREDUCE_REDUCE_SPECULATIVE, true); conf.set(Constants.SHIFU_MODEL_CONFIG, ShifuFileUtils.getFileSystemBySourceType(source) .makeQualified(new Path(this.pathFinder.getModelConfigPath(source))).toString()); conf.set(Constants.SHIFU_COLUMN_CONFIG, ShifuFileUtils.getFileSystemBySourceType(source) .makeQualified(new Path(this.pathFinder.getColumnConfigPath(source))).toString()); conf.set(NNConstants.MAPRED_JOB_QUEUE_NAME, Environment.getProperty(Environment.HADOOP_JOB_QUEUE, "default")); conf.set(Constants.SHIFU_MODELSET_SOURCE_TYPE, source.toString()); // set mapreduce.job.max.split.locations to 30 to suppress warnings conf.setInt(GuaguaMapReduceConstants.MAPREDUCE_JOB_MAX_SPLIT_LOCATIONS, 5000); conf.set("mapred.reduce.slowstart.completed.maps", Environment.getProperty("mapred.reduce.slowstart.completed.maps", "0.8")); conf.set(Constants.SHIFU_STATS_FILTER_EXPRESSIONS, super.modelConfig.getSegmentFilterExpressionsAsString()); log.info("segment expressions is {}", super.modelConfig.getSegmentFilterExpressionsAsString()); String hdpVersion = HDPUtils.getHdpVersionForHDP224(); if (StringUtils.isNotBlank(hdpVersion)) { // for hdp 2.2.4, hdp.version should be set and configuration files should be add to container class path conf.set("hdp.version", hdpVersion); HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("hdfs-site.xml"), conf); HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("core-site.xml"), conf); HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("mapred-site.xml"), conf); HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("yarn-site.xml"), conf); }/* w ww. j av a 2 s . com*/ // one can set guagua conf in shifuconfig CommonUtils.injectHadoopShifuEnvironments(new ValueVisitor() { @Override public void inject(Object key, Object value) { conf.set(key.toString(), value.toString()); } }); }
From source file:ml.shifu.shifu.core.processor.StatsModelProcessor.java
License:Apache License
private void prepareJobConf(SourceType source, Configuration conf, String filePath) throws IOException { // add jars to hadoop mapper and reducer new GenericOptionsParser(conf, new String[] { "-libjars", addRuntimeJars(), "-files", filePath }); conf.setBoolean(GuaguaMapReduceConstants.MAPRED_MAP_TASKS_SPECULATIVE_EXECUTION, true); conf.setBoolean(GuaguaMapReduceConstants.MAPRED_REDUCE_TASKS_SPECULATIVE_EXECUTION, true); conf.set(Constants.SHIFU_MODEL_CONFIG, ShifuFileUtils.getFileSystemBySourceType(source) .makeQualified(new Path(super.getPathFinder().getModelConfigPath(source))).toString()); conf.set(Constants.SHIFU_COLUMN_CONFIG, ShifuFileUtils.getFileSystemBySourceType(source) .makeQualified(new Path(super.getPathFinder().getColumnConfigPath(source))).toString()); conf.set(NNConstants.MAPRED_JOB_QUEUE_NAME, Environment.getProperty(Environment.HADOOP_JOB_QUEUE, "default")); conf.set(Constants.SHIFU_MODELSET_SOURCE_TYPE, source.toString()); // set mapreduce.job.max.split.locations to 30 to suppress warnings conf.setInt(GuaguaMapReduceConstants.MAPREDUCE_JOB_MAX_SPLIT_LOCATIONS, 30); conf.set("mapred.reduce.slowstart.completed.maps", Environment.getProperty("mapred.reduce.slowstart.completed.maps", "0.9")); String hdpVersion = HDPUtils.getHdpVersionForHDP224(); if (StringUtils.isNotBlank(hdpVersion)) { // for hdp 2.2.4, hdp.version should be set and configuration files should be add to container class path conf.set("hdp.version", hdpVersion); HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("hdfs-site.xml"), conf); HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("core-site.xml"), conf); HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("mapred-site.xml"), conf); HDPUtils.addFileToClassPath(HDPUtils.findContainingFile("yarn-site.xml"), conf); }// ww w . j a v a2 s. c o m }