List of usage examples for org.apache.hadoop.conf Configuration setInt
public void setInt(String name, int value)
name
property to an int
. From source file:be.ugent.intec.halvade.utils.HalvadeConf.java
License:Open Source License
public static void setSequenceDictionary(Configuration conf, SAMSequenceDictionary dict) throws IOException, URISyntaxException { int counter = 0; for (SAMSequenceRecord seq : dict.getSequences()) { conf.set(dictionarySequenceName + counter, seq.getSequenceName()); conf.setInt(dictionarySequenceLength + counter, seq.getSequenceLength()); counter++;/*from ww w . ja v a 2s.c o m*/ } conf.setInt(dictionaryCount, counter); }
From source file:be.ugent.intec.halvade.utils.HalvadeConf.java
License:Open Source License
public static void setMinChrLength(Configuration conf, int val) { Logger.DEBUG("min chr size set to " + val, 3); conf.setInt(minChrSize, val); }
From source file:biglayer.AutoCoder.java
License:Apache License
/** * Runs this tool./*from w w w . jav a 2 s. c o m*/ */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers") .create(NUM_REDUCERS)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } /*if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; }*/ //String inputPath = cmdline.getOptionValue(INPUT); //String outputPath = cmdline.getOptionValue(OUTPUT); String inputPath = "qiwang321/MNIST-mingled-key/part*"; String outputPath = "shangfu/layeroutput"; int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1; LOG.info("Tool: " + AutoCoder.class.getSimpleName()); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath); LOG.info(" - number of reducers: " + reduceTasks); Configuration conf = getConf(); conf.setInt("num_reduce_task", reduceTasks); conf.set("sidepath", outputPath + "_side/"); Job job0 = Job.getInstance(conf); job0.setJobName(AutoCoder.class.getSimpleName()); job0.setJarByClass(AutoCoder.class); job0.setNumReduceTasks(reduceTasks); job0.getConfiguration().setInt("layer_ind", 0); FileInputFormat.setInputPaths(job0, new Path(inputPath)); FileOutputFormat.setOutputPath(job0, new Path(outputPath + "_0")); job0.setInputFormatClass(KeyValueTextInputFormat.class); job0.setOutputFormatClass(SequenceFileOutputFormat.class); job0.setMapOutputKeyClass(PairOfInts.class); job0.setMapOutputValueClass(ModelNode.class); job0.setOutputKeyClass(PairOfInts.class); job0.setOutputValueClass(ModelNode.class); job0.setMapperClass(MyMapper0.class); job0.setReducerClass(MyReducer0.class); job0.setPartitionerClass(MyPartitioner.class); // Delete the output directory if it exists already. Path outputDir = new Path(outputPath + "_0"); FileSystem.get(getConf()).delete(outputDir, true); long startTime = System.currentTimeMillis(); long codeStart = System.currentTimeMillis(); double codeTimeSum = 0; job0.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); codeTimeSum += (System.currentTimeMillis() - startTime) / 1000.0; for (int iterations = 1; iterations < GlobalUtil.NUM_LAYER + 1; iterations++) { Job job1 = Job.getInstance(conf); job1.setJobName(AutoCoder.class.getSimpleName()); job1.setJarByClass(AutoCoder.class); job1.setNumReduceTasks(reduceTasks); job1.getConfiguration().setInt("layer_ind", iterations); FileInputFormat.setInputPaths(job1, new Path(outputPath + "_" + (iterations - 1))); FileOutputFormat.setOutputPath(job1, new Path(outputPath + "_" + iterations + "_train")); LOG.info("Tool: " + AutoCoder.class.getSimpleName()); LOG.info(" - input path: " + outputPath + "_" + (iterations - 1)); LOG.info(" - output path: " + outputPath + "_" + iterations + "_train"); LOG.info(" - number of reducers: " + reduceTasks); job1.setInputFormatClass(SequenceFileInputFormat.class); job1.setOutputFormatClass(SequenceFileOutputFormat.class); job1.setMapOutputKeyClass(PairOfInts.class); job1.setMapOutputValueClass(ModelNode.class); job1.setOutputKeyClass(PairOfInts.class); job1.setOutputValueClass(ModelNode.class); job1.setMapperClass(MyMapper.class); job1.setReducerClass(MyReducer_Train.class); job1.setPartitionerClass(MyPartitioner.class); // Delete the output directory if it exists already. outputDir = new Path(outputPath + "_" + iterations + "_train"); FileSystem.get(getConf()).delete(outputDir, true); startTime = System.currentTimeMillis(); job1.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); codeTimeSum += (System.currentTimeMillis() - startTime) / 1000.0; Job job2 = Job.getInstance(conf); job2.setJobName(AutoCoder.class.getSimpleName()); job2.setJarByClass(AutoCoder.class); job2.setNumReduceTasks(reduceTasks); job2.getConfiguration().setInt("layer_ind", iterations); FileInputFormat.setInputPaths(job2, new Path(outputPath + "_" + (iterations + "_train"))); FileOutputFormat.setOutputPath(job2, new Path(outputPath + "_" + iterations)); LOG.info("Tool: " + AutoCoder.class.getSimpleName()); LOG.info(" - input path: " + outputPath + "_" + iterations + "_train"); LOG.info(" - output path: " + outputPath + "_" + iterations); LOG.info(" - number of reducers: " + reduceTasks); job2.setInputFormatClass(SequenceFileInputFormat.class); job2.setOutputFormatClass(SequenceFileOutputFormat.class); job2.setMapOutputKeyClass(PairOfInts.class); job2.setMapOutputValueClass(ModelNode.class); job2.setOutputKeyClass(PairOfInts.class); job2.setOutputValueClass(ModelNode.class); job2.setMapperClass(MyMapper.class); job2.setReducerClass(MyReducer_GenData.class); job2.setPartitionerClass(MyPartitioner.class); // Delete the output directory if it exists already. outputDir = new Path(outputPath + "_" + iterations); FileSystem.get(getConf()).delete(outputDir, true); startTime = System.currentTimeMillis(); job2.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); codeTimeSum += (System.currentTimeMillis() - startTime) / 1000.0; } LOG.info(" - input path: " + outputPath + "_" + GlobalUtil.NUM_LAYER); LOG.info(" - output path: " + outputPath); reduceTasks = 1; LOG.info(" - number of reducers: " + reduceTasks); Job job_super = Job.getInstance(conf); job_super.setJobName(AutoCoder.class.getSimpleName()); job_super.setJarByClass(AutoCoder.class); job_super.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job_super, new Path(outputPath + "_" + GlobalUtil.NUM_LAYER)); FileOutputFormat.setOutputPath(job_super, new Path(outputPath)); job_super.setInputFormatClass(SequenceFileInputFormat.class); job_super.setOutputFormatClass(SequenceFileOutputFormat.class); job_super.setMapOutputKeyClass(PairOfInts.class); job_super.setMapOutputValueClass(ModelNode.class); job_super.setOutputKeyClass(NullWritable.class); job_super.setOutputValueClass(NullWritable.class); job_super.setMapperClass(MyMapper_Super.class); job_super.setReducerClass(MyReducer_Super.class); job_super.setPartitionerClass(MyPartitioner.class); // Delete the output directory if it exists already. outputDir = new Path(outputPath); FileSystem.get(getConf()).delete(outputDir, true); startTime = System.currentTimeMillis(); job_super.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); codeTimeSum += (System.currentTimeMillis() - startTime) / 1000.0; Log.info("Final Time: " + ((System.currentTimeMillis() - codeStart) / 1000.0) + " seconds, " + codeTimeSum + " seconds."); //prepareNextIteration(inputPath0, outputPath,iterations,conf,reduceTasks); return 0; }
From source file:bigsidemodel.AutoCoder.java
License:Apache License
/** * Runs this tool.//from w w w. j a v a2s .c o m */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers") .create(NUM_REDUCERS)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } /*if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; }*/ //String inputPath = cmdline.getOptionValue(INPUT); //String outputPath = cmdline.getOptionValue(OUTPUT); String inputPath = "qiwang321/best5-mingled-key-56x56/part*"; String outputPath = "shangfu/bigoutput"; int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1; LOG.info("Tool: " + AutoCoder.class.getSimpleName()); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath + "0"); LOG.info(" - number of reducers: " + reduceTasks); Configuration conf = getConf(); conf.setInt("num_reduce_task", reduceTasks); conf.set("sidepath", outputPath + "_side/"); Job job0 = Job.getInstance(conf); job0.setJobName(AutoCoder.class.getSimpleName()); job0.setJarByClass(AutoCoder.class); // set the path of the information of k clusters in this iteration job0.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job0, new Path(inputPath)); FileOutputFormat.setOutputPath(job0, new Path(outputPath + "0")); job0.setInputFormatClass(KeyValueTextInputFormat.class); job0.setOutputFormatClass(SequenceFileOutputFormat.class); job0.setMapOutputKeyClass(PairOfInts.class); job0.setMapOutputValueClass(DataNode.class); job0.setOutputKeyClass(PairOfInts.class); job0.setOutputValueClass(DataNode.class); job0.setMapperClass(MyMapper0.class); job0.setReducerClass(MyReducer0.class); job0.setPartitionerClass(MyPartitioner.class); // Delete the output directory if it exists already. Path outputDir = new Path(outputPath + "0"); FileSystem.get(getConf()).delete(outputDir, true); long codeStart = System.currentTimeMillis(); double jobTimeSum = 0; long startTime = System.currentTimeMillis(); job0.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); jobTimeSum += (System.currentTimeMillis() - startTime) / 1000.0; //======= Job 1 LOG.info("Tool: " + AutoCoder.class.getSimpleName()); LOG.info(" - input path: " + outputPath + "0"); LOG.info(" - output path: " + outputPath + "1"); LOG.info(" - number of reducers: " + 1); int nModel = reduceTasks; reduceTasks = 1; Job job1 = Job.getInstance(conf); job1.setJobName(AutoCoder.class.getSimpleName()); job1.setJarByClass(AutoCoder.class); // set the path of the information of k clusters in this iteration job1.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job1, new Path(outputPath + "0")); FileOutputFormat.setOutputPath(job1, new Path(outputPath + "1")); job1.setInputFormatClass(SequenceFileInputFormat.class); job1.setOutputFormatClass(SequenceFileOutputFormat.class); job1.setMapOutputKeyClass(PairOfInts.class); job1.setMapOutputValueClass(DataNode.class); job1.setOutputKeyClass(NullWritable.class); job1.setOutputValueClass(NullWritable.class); job1.setMapperClass(MyMapper1.class); job1.setReducerClass(MyReducer1.class); job1.setPartitionerClass(MyPartitioner.class); // Delete the output directory if it exists already. outputDir = new Path(outputPath + "1"); FileSystem.get(getConf()).delete(outputDir, true); startTime = System.currentTimeMillis(); job1.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); jobTimeSum += (System.currentTimeMillis() - startTime) / 1000.0; LOG.info("Final Time: " + ((System.currentTimeMillis() - codeStart) / 1000.0) + " seconds, " + jobTimeSum + " seconds."); return 0; }
From source file:byte_import.HexastoreBulkImport.java
License:Open Source License
public Job createSubmittableJob(String[] args) { TABLE_NAME = args[1];/*www . j a v a 2s . c om*/ Job job = null; try { job = new Job(new Configuration(), NAME); job.setJarByClass(HexastoreBulkImport.class); job.setMapperClass(sampler.TotalOrderPrep.Map.class); job.setReducerClass(Reduce.class); job.setCombinerClass(Combiner.class); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(ImmutableBytesWritable.class); job.setPartitionerClass(TotalOrderPartitioner.class); //TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), new Path("/user/npapa/"+regions+"partitions/part-r-00000")); TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), new Path("partitions/part-r-00000")); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(HFileOutputFormat.class); Path out = new Path("out"); FileOutputFormat.setOutputPath(job, out); Configuration conf = new Configuration(); FileSystem fs; try { fs = FileSystem.get(conf); if (fs.exists(out)) { fs.delete(out, true); } } catch (IOException e) { e.printStackTrace(); } HBaseAdmin hadmin = new HBaseAdmin(conf); HTableDescriptor desc = new HTableDescriptor(TABLE_NAME + "_stats"); HColumnDescriptor family = new HColumnDescriptor("size"); desc.addFamily(family); conf.setInt("zookeeper.session.timeout", 600000); if (hadmin.tableExists(TABLE_NAME + "_stats")) { //hadmin.disableTable(TABLE_NAME+"_stats"); //hadmin.deleteTable(TABLE_NAME+"_stats"); } else { hadmin.createTable(desc); } FileInputFormat.setInputPaths(job, new Path(args[0])); //job.getConfiguration().setInt("mapred.map.tasks", 18); job.getConfiguration().set("h2rdf.tableName", TABLE_NAME); job.getConfiguration().setInt("mapred.reduce.tasks", (int) TotalOrderPrep.regions); job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false); job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false); job.getConfiguration().setInt("io.sort.mb", 100); job.getConfiguration().setInt("io.file.buffer.size", 131072); job.getConfiguration().setInt("mapred.job.reuse.jvm.num.tasks", -1); //job.getConfiguration().setInt("hbase.hregion.max.filesize", 67108864); job.getConfiguration().setInt("hbase.hregion.max.filesize", 33554432); job.getConfiguration().setInt("mapred.tasktracker.map.tasks.maximum", 5); job.getConfiguration().setInt("mapred.tasktracker.reduce.tasks.maximum", 5); //job.getConfiguration().setInt("io.sort.mb", 100); } catch (IOException e2) { e2.printStackTrace(); } return job; }
From source file:ca.sparkera.adapters.mainframe.CobolSerdeUtils.java
License:Apache License
/** * Determine the layout to that's been provided for cobol serde work. * /* www .ja v a2 s.c o m*/ * @param properties * containing a key pointing to the layout, one way or another * @return layout to use while serdeing the avro file * @throws IOException * if error while trying to read the layout from another * location * @throws CobolSerdeException * if unable to find a layout or pointer to it in the properties */ public static String determineLayoutOrThrowException(Configuration conf, Properties properties) throws IOException, CobolSerdeException { //For fixed length record get length of the file String fixedRecordLength = properties.getProperty(CobolTableProperties.FB_LENGTH.getPropName()); if (fixedRecordLength != null) { conf.setInt(FixedLengthInputFormat.FIXED_RECORD_LENGTH, Integer.parseInt(fixedRecordLength)); } String layoutString = properties.getProperty(CobolTableProperties.LAYOUT_LITERAL.getPropName()); if (layoutString != null && !layoutString.equals(LAYOUT_NONE)) return CobolSerdeUtils.getLayoutFor(layoutString); //For testing purpose layoutString = properties.getProperty(CobolTableProperties.LAYOUT_TEST.getPropName()); if (layoutString != null) { return readFile(layoutString, Charset.defaultCharset()); } // Try pulling directly from URL layoutString = properties.getProperty(CobolTableProperties.LAYOUT_URL.getPropName()); if (layoutString == null || layoutString.equals(LAYOUT_NONE)) throw new CobolSerdeException(EXCEPTION_MESSAGE); try { String s = getLayoutFromFS(layoutString, conf); if (s == null) { // in case layout is not a file system return CobolSerdeUtils.getLayoutFor(new URL(layoutString).openStream()); } return s; } catch (IOException ioe) { throw new CobolSerdeException("Unable to read layout from given path: " + layoutString, ioe); } catch (URISyntaxException urie) { throw new CobolSerdeException("Unable to read layout from given path: " + layoutString, urie); } }
From source file:ca.uwaterloo.cs.bigdata2017w.assignment4.BuildPersonalizedPageRankRecords.java
License:Apache License
/** * Runs this tool.//ww w . j a va 2 s.co m */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption( OptionBuilder.withArgName("num").hasArg().withDescription("number of nodes").create(NUM_NODES)); options.addOption( OptionBuilder.withArgName("sources").hasArg().withDescription("source nodes").create(SOURCES)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(NUM_NODES)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inputPath = cmdline.getOptionValue(INPUT); String outputPath = cmdline.getOptionValue(OUTPUT); int n = Integer.parseInt(cmdline.getOptionValue(NUM_NODES)); String sourcesString = cmdline.getOptionValue(SOURCES); String[] sources = sourcesString.split(","); for (int i = 0; i < sources.length; i++) { sources[i] = sources[i].trim(); } LOG.info("Tool name: " + BuildPersonalizedPageRankRecords.class.getSimpleName()); LOG.info(" - inputDir: " + inputPath); LOG.info(" - outputDir: " + outputPath); LOG.info(" - numNodes: " + n); LOG.info(" - use sources: " + sourcesString); Configuration conf = getConf(); conf.setInt(NODE_CNT_FIELD, n); conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024); conf.setStrings(SOURCES, sources); Job job = Job.getInstance(conf); job.setJobName(BuildPersonalizedPageRankRecords.class.getSimpleName() + ":" + inputPath); job.setJarByClass(BuildPersonalizedPageRankRecords.class); job.setNumReduceTasks(0); FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(PageRankNode.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(PageRankNode.class); job.setMapperClass(MyMapper.class); // Delete the output directory if it exists already. FileSystem.get(conf).delete(new Path(outputPath), true); job.waitForCompletion(true); return 0; }
From source file:cascading.flow.hadoop.util.HadoopUtil.java
License:Open Source License
public static void addComparators(Configuration conf, String property, Map<String, Fields> map, BaseFlowStep flowStep, Group group) { Iterator<Fields> fieldsIterator = map.values().iterator(); if (!fieldsIterator.hasNext()) return;/*from w w w . j a v a 2s .com*/ Fields fields = fieldsIterator.next(); if (fields.hasComparators()) { conf.set(property, pack(fields, conf)); return; } // use resolved fields if there are no comparators. Set<Scope> previousScopes = flowStep.getPreviousScopes(group); fields = previousScopes.iterator().next().getOutValuesFields(); if (fields.size() != 0) // allows fields.UNKNOWN to be used conf.setInt(property + ".size", fields.size()); }
From source file:cascading.flow.tez.util.TezUtil.java
License:Open Source License
public static void setMRProperties(ProcessorContext context, Configuration config, boolean isMapperOutput) { TaskAttemptID taskAttemptId = org.apache.tez.mapreduce.hadoop.mapreduce.TaskAttemptContextImpl .createMockTaskAttemptID(context.getApplicationId().getClusterTimestamp(), context.getTaskVertexIndex(), context.getApplicationId().getId(), context.getTaskIndex(), context.getTaskAttemptNumber(), isMapperOutput); config.set(JobContext.TASK_ATTEMPT_ID, taskAttemptId.toString()); config.set(JobContext.TASK_ID, taskAttemptId.getTaskID().toString()); config.setBoolean(JobContext.TASK_ISMAP, isMapperOutput); config.setInt(JobContext.TASK_PARTITION, taskAttemptId.getTaskID().getId()); }
From source file:cascading.platform.tez.Hadoop2TezPlatform.java
License:Open Source License
@Override public synchronized void setUp() throws IOException { if (configuration != null) return;/* w w w .j a va2 s .com*/ if (!isUseCluster()) { // Current usage requirements: // 1. Clients need to set "tez.local.mode" to true when creating a TezClient instance. (For the examples this can be done via -Dtez.local.mode=true) // 2. fs.defaultFS must be set to "file:///" // 2.1 If running examples - this must be set in tez-site.xml (so that it's picked up by the client, as well as the conf instances used to configure the Inputs / Outputs). // 2.2 If using programatically (without a tez-site.xml present). All configuration instances used (to crate the client / configure Inputs / Outputs) - must have this property set. // 3. tez.runtime.optimize.local.fetch needs to be set to true (either via tez-site.xml or in all configurations used to create the job (similar to fs.defaultFS in step 2)) // 4. tez.staging-dir must be set (either programatically or via tez-site.xml). // Until TEZ-1337 goes in - the staging-dir for the job is effectively the root of the filesystem (and where inputs are read from / written to if relative paths are used). LOG.info("not using cluster"); configuration = new Configuration(); configuration.setInt(FlowRuntimeProps.GATHER_PARTITIONS, getNumGatherPartitions()); // configuration.setInt( FlowRuntimeProps.GATHER_PARTITIONS, 1 ); // deadlocks if larger than 1 configuration.set(TezConfiguration.TEZ_LOCAL_MODE, "true"); configuration.set("fs.defaultFS", "file:///"); configuration.set("tez.runtime.optimize.local.fetch", "true"); // hack to prevent deadlocks where downstream processors are scheduled before upstream configuration.setInt("tez.am.inline.task.execution.max-tasks", 3); // testHashJoinMergeIntoHashJoinAccumulatedAccumulatedMerge fails if set to 2 configuration.set(TezConfiguration.TEZ_IGNORE_LIB_URIS, "true"); // in local mode, use local classpath configuration.setInt(YarnConfiguration.DEBUG_NM_DELETE_DELAY_SEC, -1); configuration.set(TezConfiguration.TEZ_GENERATE_DEBUG_ARTIFACTS, "true"); configuration.set("tez.am.mode.session", "true"); // allows multiple TezClient instances to be used in a single jvm if (!Util.isEmpty(System.getProperty("hadoop.tmp.dir"))) configuration.set("hadoop.tmp.dir", System.getProperty("hadoop.tmp.dir")); else configuration.set("hadoop.tmp.dir", "build/test/tmp"); fileSys = FileSystem.get(configuration); } else { LOG.info("using cluster"); if (Util.isEmpty(System.getProperty("hadoop.log.dir"))) System.setProperty("hadoop.log.dir", "build/test/log"); if (Util.isEmpty(System.getProperty("hadoop.tmp.dir"))) System.setProperty("hadoop.tmp.dir", "build/test/tmp"); new File(System.getProperty("hadoop.log.dir")).mkdirs(); // ignored new File(System.getProperty("hadoop.tmp.dir")).mkdirs(); // ignored Configuration defaultConf = new Configuration(); defaultConf.setInt(FlowRuntimeProps.GATHER_PARTITIONS, getNumGatherPartitions()); defaultConf.setInt(YarnConfiguration.DEBUG_NM_DELETE_DELAY_SEC, -1); // defaultConf.set( TezConfiguration.TEZ_AM_LOG_LEVEL, "DEBUG" ); // defaultConf.set( TezConfiguration.TEZ_TASK_LOG_LEVEL, "DEBUG" ); defaultConf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 1); defaultConf.setBoolean(TezConfiguration.TEZ_AM_NODE_BLACKLISTING_ENABLED, false); defaultConf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, System.getProperty("hadoop.tmp.dir")); miniDFSCluster = new MiniDFSCluster.Builder(defaultConf).numDataNodes(4).format(true).racks(null) .build(); fileSys = miniDFSCluster.getFileSystem(); Configuration tezConf = new Configuration(defaultConf); tezConf.set("fs.defaultFS", fileSys.getUri().toString()); // use HDFS tezConf.set(MRJobConfig.MR_AM_STAGING_DIR, "/apps_staging_dir"); // see MiniTezClusterWithTimeline as alternate miniTezCluster = new MiniTezCluster(getClass().getName(), 4, 1, 1); // todo: set to 4 miniTezCluster.init(tezConf); miniTezCluster.start(); configuration = miniTezCluster.getConfig(); // stats won't work after completion unless ATS is used if (setTimelineStore(configuration)) // true if ats can be loaded and configured for this hadoop version { configuration.set(TezConfiguration.TEZ_HISTORY_LOGGING_SERVICE_CLASS, ATSHistoryLoggingService.class.getName()); configuration.setBoolean(YarnConfiguration.TIMELINE_SERVICE_ENABLED, true); configuration.set(YarnConfiguration.TIMELINE_SERVICE_ADDRESS, "localhost:10200"); configuration.set(YarnConfiguration.TIMELINE_SERVICE_WEBAPP_ADDRESS, "localhost:8188"); configuration.set(YarnConfiguration.TIMELINE_SERVICE_WEBAPP_HTTPS_ADDRESS, "localhost:8190"); yarnHistoryServer = new ApplicationHistoryServer(); yarnHistoryServer.init(configuration); yarnHistoryServer.start(); } } configuration.setInt(TezConfiguration.TEZ_AM_MAX_APP_ATTEMPTS, 1); configuration.setInt(TezConfiguration.TEZ_AM_TASK_MAX_FAILED_ATTEMPTS, 1); configuration.setInt(TezConfiguration.TEZ_AM_MAX_TASK_FAILURES_PER_NODE, 1); Map<Object, Object> globalProperties = getGlobalProperties(); if (logger != null) globalProperties.put("log4j.logger", logger); FlowProps.setJobPollingInterval(globalProperties, 10); // should speed up tests Hadoop2TezPlanner.copyProperties(configuration, globalProperties); // copy any external properties Hadoop2TezPlanner.copyConfiguration(properties, configuration); // put all properties on the jobconf ExitUtil.disableSystemExit(); // forbidSystemExitCall(); }