List of usage examples for org.apache.hadoop.conf Configuration setBoolean
public void setBoolean(String name, boolean value)
name
property to a boolean
. From source file:org.apache.gobblin.compaction.mapreduce.CompactionJobConfigurator.java
License:Apache License
/** * Customized MR job creation for Avro./*from w ww. jav a2s . com*/ * * @param dataset A path or directory which needs compaction * @return A configured map-reduce job for avro compaction */ public Job createJob(FileSystemDataset dataset) throws IOException { Configuration conf = HadoopUtils.getConfFromState(state); // Turn on mapreduce output compression by default if (conf.get("mapreduce.output.fileoutputformat.compress") == null && conf.get("mapred.output.compress") == null) { conf.setBoolean("mapreduce.output.fileoutputformat.compress", true); } // Disable delegation token cancellation by default if (conf.get("mapreduce.job.complete.cancel.delegation.tokens") == null) { conf.setBoolean("mapreduce.job.complete.cancel.delegation.tokens", false); } addJars(conf, this.state, fs); Job job = Job.getInstance(conf); job.setJobName(MRCompactorJobRunner.HADOOP_JOB_NAME); boolean emptyDirectoryFlag = this.configureInputAndOutputPaths(job, dataset); if (emptyDirectoryFlag) { this.state.setProp(HiveRegistrationPolicy.MAPREDUCE_JOB_INPUT_PATH_EMPTY_KEY, true); } this.configureMapper(job); this.configureReducer(job); if (emptyDirectoryFlag || !this.shouldDeduplicate) { job.setNumReduceTasks(0); } // Configure schema at the last step because FilesInputFormat will be used internally this.configureSchema(job); this.isJobCreated = true; this.configuredJob = job; return job; }
From source file:org.apache.gobblin.compaction.mapreduce.MRCompactorJobRunner.java
License:Apache License
@Override public void run() { Configuration conf = HadoopUtils.getConfFromState(this.dataset.jobProps()); // Turn on mapreduce output compression by default if (conf.get("mapreduce.output.fileoutputformat.compress") == null && conf.get("mapred.output.compress") == null) { conf.setBoolean("mapreduce.output.fileoutputformat.compress", true); }/*from w w w .j av a2 s. c o m*/ // Disable delegation token cancellation by default if (conf.get("mapreduce.job.complete.cancel.delegation.tokens") == null) { conf.setBoolean("mapreduce.job.complete.cancel.delegation.tokens", false); } try { DateTime compactionTimestamp = getCompactionTimestamp(); LOG.info("MR Compaction Job Timestamp " + compactionTimestamp.getMillis()); if (this.dataset.jobProps().getPropAsBoolean(MRCompactor.COMPACTION_JOB_LATE_DATA_MOVEMENT_TASK, false)) { List<Path> newLateFilePaths = Lists.newArrayList(); for (String filePathString : this.dataset.jobProps() .getPropAsList(MRCompactor.COMPACTION_JOB_LATE_DATA_FILES)) { if (FilenameUtils.isExtension(filePathString, getApplicableFileExtensions())) { newLateFilePaths.add(new Path(filePathString)); } } Path lateDataOutputPath = this.outputDeduplicated ? this.dataset.outputLatePath() : this.dataset.outputPath(); LOG.info(String.format("Copying %d late data files to %s", newLateFilePaths.size(), lateDataOutputPath)); if (this.outputDeduplicated) { if (!this.fs.exists(lateDataOutputPath)) { if (!this.fs.mkdirs(lateDataOutputPath)) { throw new RuntimeException( String.format("Failed to create late data output directory: %s.", lateDataOutputPath.toString())); } } } this.copyDataFiles(lateDataOutputPath, newLateFilePaths); if (this.outputDeduplicated) { dataset.checkIfNeedToRecompact(datasetHelper); } this.status = Status.COMMITTED; } else { if (this.fs.exists(this.dataset.outputPath()) && !canOverwriteOutputDir()) { LOG.warn(String.format("Output paths %s exists. Will not compact %s.", this.dataset.outputPath(), this.dataset.inputPaths())); this.status = Status.COMMITTED; return; } addJars(conf); Job job = Job.getInstance(conf); this.configureJob(job); this.submitAndWait(job); if (shouldPublishData(compactionTimestamp)) { // remove all invalid empty files due to speculative task execution List<Path> goodPaths = CompactionJobConfigurator.getGoodFiles(job, this.dataset.outputTmpPath(), this.tmpFs, ImmutableList.of("avro")); if (!this.recompactAllData && this.recompactFromDestPaths) { // append new files without deleting output directory addGoodFilesToOutputPath(goodPaths); // clean up late data from outputLateDirectory, which has been set to inputPath deleteFilesByPaths(this.dataset.inputPaths()); } else { moveTmpPathToOutputPath(); if (this.recompactFromDestPaths) { deleteFilesByPaths(this.dataset.additionalInputPaths()); } } submitSlaEvent(job); LOG.info("Successfully published data for input folder " + this.dataset.inputPaths()); this.status = Status.COMMITTED; } else { LOG.info("Data not published for input folder " + this.dataset.inputPaths() + " due to incompleteness"); this.status = Status.ABORTED; return; } } if (renameSourceDir) { MRCompactor.renameSourceDirAsCompactionComplete(this.fs, this.dataset); } else { this.markOutputDirAsCompleted(compactionTimestamp); } this.submitRecordsCountsEvent(); } catch (Throwable t) { throw Throwables.propagate(t); } }
From source file:org.apache.hadoop.examples.BaileyBorweinPlouffe.java
License:Apache License
/** Create and setup a job */ private static Job createJob(String name, Configuration conf) throws IOException { final Job job = Job.getInstance(conf, NAME + "_" + name); final Configuration jobconf = job.getConfiguration(); job.setJarByClass(BaileyBorweinPlouffe.class); // setup mapper job.setMapperClass(BbpMapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(BytesWritable.class); // setup reducer job.setReducerClass(BbpReducer.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(BytesWritable.class); job.setNumReduceTasks(1);/*from w w w .j a v a 2 s . co m*/ // setup input job.setInputFormatClass(BbpInputFormat.class); // disable task timeout jobconf.setLong(MRJobConfig.TASK_TIMEOUT, 0); // do not use speculative execution jobconf.setBoolean(MRJobConfig.MAP_SPECULATIVE, false); jobconf.setBoolean(MRJobConfig.REDUCE_SPECULATIVE, false); return job; }
From source file:org.apache.hadoop.examples.pi.DistSum.java
License:Apache License
/** Create a job */ private Job createJob(String name, Summation sigma) throws IOException { final Job job = Job.getInstance(getConf(), parameters.remoteDir + "/" + name); final Configuration jobconf = job.getConfiguration(); job.setJarByClass(DistSum.class); jobconf.setInt(N_PARTS, parameters.nParts); SummationWritable.write(sigma, DistSum.class, jobconf); // disable task timeout jobconf.setLong(MRJobConfig.TASK_TIMEOUT, 0); // do not use speculative execution jobconf.setBoolean(MRJobConfig.MAP_SPECULATIVE, false); jobconf.setBoolean(MRJobConfig.REDUCE_SPECULATIVE, false); return job;/* w w w. ja v a 2 s . co m*/ }
From source file:org.apache.hama.bsp.TestCheckpoint.java
License:Apache License
public void testCheckpointInterval() throws Exception { Configuration config = new Configuration(); System.setProperty("user.dir", "/tmp"); config.set(SyncServiceFactory.SYNC_CLIENT_CLASS, TempSyncClient.class.getName()); config.set(Constants.FAULT_TOLERANCE_CLASS, AsyncRcvdMsgCheckpointImpl.class.getName()); config.setBoolean(Constants.FAULT_TOLERANCE_FLAG, true); config.setBoolean(Constants.CHECKPOINT_ENABLED, true); config.setInt(Constants.CHECKPOINT_INTERVAL, 2); config.set("bsp.output.dir", "/tmp/hama-test_out"); config.set("bsp.local.dir", "/tmp/hama-test"); FileSystem dfs = FileSystem.get(config); BSPJob job = new BSPJob(new BSPJobID("checkpttest", 1), "/tmp"); TaskAttemptID taskId = new TaskAttemptID(new TaskID(job.getJobID(), 1), 1); TestMessageManager messenger = new TestMessageManager(); PeerSyncClient syncClient = SyncServiceFactory.getPeerSyncClient(config); @SuppressWarnings("rawtypes") BSPPeer bspTask = new TestBSPPeer(job, config, taskId, new Counters(), -1L, (BSPPeerSyncClient) syncClient, messenger, TaskStatus.State.RUNNING); assertNotNull("BSPPeerImpl should not be null.", bspTask); LOG.info("Created bsp peer and other parameters"); int port = BSPNetUtils.getFreePort(12502); LOG.info("Got port = " + port); boolean result = syncClient.getInformation( syncClient.constructKey(job.getJobID(), "checkpoint", "" + bspTask.getPeerIndex()), new ArrayWritable(LongWritable.class)); assertFalse(result);/*from ww w . j a v a 2 s . c o m*/ bspTask.sync(); // Superstep 1 checkSuperstepMsgCount(syncClient, bspTask, job, 1L, 0L); Text txtMessage = new Text("data"); messenger.addMessage(txtMessage); bspTask.sync(); // Superstep 2 checkSuperstepMsgCount(syncClient, bspTask, job, 1L, 0L); messenger.addMessage(txtMessage); bspTask.sync(); // Superstep 3 checkSuperstepMsgCount(syncClient, bspTask, job, 3L, 1L); bspTask.sync(); // Superstep 4 checkSuperstepMsgCount(syncClient, bspTask, job, 3L, 1L); messenger.addMessage(txtMessage); messenger.addMessage(txtMessage); bspTask.sync(); // Superstep 5 checkSuperstepMsgCount(syncClient, bspTask, job, 5L, 2L); bspTask.sync(); // Superstep 6 checkSuperstepMsgCount(syncClient, bspTask, job, 5L, 2L); dfs.delete(new Path("checkpoint"), true); }
From source file:org.apache.hama.bsp.TestCheckpoint.java
License:Apache License
@SuppressWarnings("rawtypes") public void testCheckpoint() throws Exception { Configuration config = new Configuration(); config.set(SyncServiceFactory.SYNC_CLIENT_CLASS, TempSyncClient.class.getName()); config.setBoolean(Constants.FAULT_TOLERANCE_FLAG, true); config.set(Constants.FAULT_TOLERANCE_CLASS, AsyncRcvdMsgCheckpointImpl.class.getName()); config.setBoolean(Constants.CHECKPOINT_ENABLED, true); int port = BSPNetUtils.getFreePort(12502); LOG.info("Got port = " + port); config.set(Constants.PEER_HOST, Constants.DEFAULT_PEER_HOST); config.setInt(Constants.PEER_PORT, port); config.set("bsp.output.dir", "/tmp/hama-test_out"); config.set("bsp.local.dir", "/tmp/hama-test"); FileSystem dfs = FileSystem.get(config); BSPJob job = new BSPJob(new BSPJobID("checkpttest", 1), "/tmp"); TaskAttemptID taskId = new TaskAttemptID(new TaskID(job.getJobID(), 1), 1); TestMessageManager messenger = new TestMessageManager(); PeerSyncClient syncClient = SyncServiceFactory.getPeerSyncClient(config); BSPPeer bspTask = new TestBSPPeer(job, config, taskId, new Counters(), -1L, (BSPPeerSyncClient) syncClient, messenger, TaskStatus.State.RUNNING); assertNotNull("BSPPeerImpl should not be null.", bspTask); LOG.info("Created bsp peer and other parameters"); @SuppressWarnings("unused") FaultTolerantPeerService<Text> service = null; bspTask.sync();//from ww w.j ava 2s .c o m LOG.info("Completed first sync."); checkSuperstepMsgCount(syncClient, bspTask, job, 1L, 0L); Text txtMessage = new Text("data"); messenger.addMessage(txtMessage); bspTask.sync(); LOG.info("Completed second sync."); checkSuperstepMsgCount(syncClient, bspTask, job, 2L, 1L); // Checking the messages for superstep 2 and peer id 1 String expectedPath = "checkpoint/job_checkpttest_0001/2/1"; FSDataInputStream in = dfs.open(new Path(expectedPath)); String className = in.readUTF(); Text message = (Text) ReflectionUtils.newInstance(Class.forName(className), config); message.readFields(in); assertEquals("data", message.toString()); dfs.delete(new Path("checkpoint"), true); }
From source file:org.apache.hama.bsp.TestCheckpoint.java
License:Apache License
public void testPeerRecovery() throws Exception { Configuration config = new Configuration(); config.set(SyncServiceFactory.SYNC_CLIENT_CLASS, TempSyncClient.class.getName()); config.set(Constants.FAULT_TOLERANCE_CLASS, AsyncRcvdMsgCheckpointImpl.class.getName()); config.setBoolean(Constants.CHECKPOINT_ENABLED, true); int port = BSPNetUtils.getFreePort(12502); LOG.info("Got port = " + port); config.set(Constants.PEER_HOST, Constants.DEFAULT_PEER_HOST); config.setInt(Constants.PEER_PORT, port); config.set("bsp.output.dir", "/tmp/hama-test_out"); config.set("bsp.local.dir", "/tmp/hama-test"); FileSystem dfs = FileSystem.get(config); BSPJob job = new BSPJob(new BSPJobID("checkpttest", 1), "/tmp"); TaskAttemptID taskId = new TaskAttemptID(new TaskID(job.getJobID(), 1), 1); TestMessageManager messenger = new TestMessageManager(); PeerSyncClient syncClient = SyncServiceFactory.getPeerSyncClient(config); Text txtMessage = new Text("data"); String writeKey = "job_checkpttest_0001/checkpoint/1/"; Writable[] writableArr = new Writable[2]; writableArr[0] = new LongWritable(3L); writableArr[1] = new LongWritable(5L); ArrayWritable arrWritable = new ArrayWritable(LongWritable.class); arrWritable.set(writableArr);/*from www .j a va2s. co m*/ syncClient.storeInformation(writeKey, arrWritable, true, null); String writePath = "checkpoint/job_checkpttest_0001/3/1"; FSDataOutputStream out = dfs.create(new Path(writePath)); for (int i = 0; i < 5; ++i) { out.writeUTF(txtMessage.getClass().getCanonicalName()); txtMessage.write(out); } out.close(); @SuppressWarnings("unused") BSPPeer<?, ?, ?, ?, Text> bspTask = new TestBSPPeer(job, config, taskId, new Counters(), 3L, (BSPPeerSyncClient) syncClient, messenger, TaskStatus.State.RECOVERING); BSPMessageBundle<Text> bundleRead = messenger.getLoopbackBundle(); assertEquals(5, bundleRead.size()); String recoveredMsg = bundleRead.iterator().next().toString(); assertEquals(recoveredMsg, "data"); dfs.delete(new Path("checkpoint"), true); }
From source file:org.apache.hama.bsp.TestPartitioning.java
License:Apache License
public void testPartitioner() throws Exception { Configuration conf = new Configuration(); conf.set("bsp.local.dir", "/tmp/hama-test/partitioning"); conf.setBoolean("bsp.input.runtime.partitioning", true); BSPJob bsp = new BSPJob(new HamaConfiguration(conf)); bsp.setJobName("Test partitioning with input"); bsp.setBspClass(PartionedBSP.class); bsp.setNumBspTask(2);/*from w ww. j a v a 2 s .c om*/ conf.setInt(Constants.ZOOKEEPER_SESSION_TIMEOUT, 600); bsp.setInputFormat(TextInputFormat.class); bsp.setOutputFormat(NullOutputFormat.class); FileInputFormat.setInputPaths(bsp, "../CHANGES.txt,../README.md"); bsp.setPartitioner(HashPartitioner.class); assertTrue(bsp.waitForCompletion(true)); FileSystem fs = FileSystem.get(conf); fs.delete(OUTPUT_PATH, true); }
From source file:org.apache.hama.ml.kmeans.KMeansBSP.java
License:Apache License
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { if (args.length < 6) { LOG.info(/* w ww.j a va2 s. c om*/ "USAGE: <INPUT_PATH> <OUTPUT_PATH> <COUNT> <K> <DIMENSION OF VECTORS> <MAXITERATIONS> <optional: num of tasks>"); return; } Configuration conf = new Configuration(); int count = Integer.parseInt(args[2]); int k = Integer.parseInt(args[3]); int dimension = Integer.parseInt(args[4]); int iterations = Integer.parseInt(args[5]); conf.setInt(MAX_ITERATIONS_KEY, iterations); Path in = new Path(args[0]); Path out = new Path(args[1]); Path center = new Path(in, "center/cen.seq"); Path centerOut = new Path(out, "center/center_output.seq"); conf.set(CENTER_IN_PATH, center.toString()); conf.set(CENTER_OUT_PATH, centerOut.toString()); // if you're in local mode, you can increase this to match your core sizes conf.set("bsp.local.tasks.maximum", "" + Runtime.getRuntime().availableProcessors()); // deactivate (set to false) if you want to iterate over disk, else it will // cache the input vectors in memory conf.setBoolean(CACHING_ENABLED_KEY, true); BSPJob job = createJob(conf, in, out, false); LOG.info("N: " + count + " k: " + k + " Dimension: " + dimension + " Iterations: " + iterations); FileSystem fs = FileSystem.get(conf); // prepare the input, like deleting old versions and creating centers prepareInput(count, k, dimension, conf, in, center, out, fs); if (args.length == 7) { job.setNumBspTask(Integer.parseInt(args[6])); } // just submit the job job.waitForCompletion(true); }
From source file:org.apache.hama.pipes.Submitter.java
License:Apache License
/** * Set whether to keep the command file for debugging * // w w w.j a v a 2 s . c om * @param conf the configuration to modify * @param keep the new value */ public static void setKeepCommandFile(Configuration conf, boolean keep) { conf.setBoolean("hama.pipes.command-file.keep", keep); }