List of usage examples for org.apache.hadoop.conf Configuration setBoolean
public void setBoolean(String name, boolean value)
name
property to a boolean
. From source file:com.twitter.hraven.etl.JobFileProcessor.java
License:Apache License
/** * @param conf// w ww.java2 s. c o m * to use to create and run the job * @param scan * to be used to scan the raw table. * @param totalJobCount * the total number of jobs that need to be run in this batch. Used * in job name. * @return The job to be submitted to the cluster. * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ private Job getProcessingJob(Configuration conf, Scan scan, int totalJobCount) throws IOException { Configuration confClone = new Configuration(conf); // Turn off speculative execution. // Note: must be BEFORE the job construction with the new mapreduce API. confClone.setBoolean("mapred.map.tasks.speculative.execution", false); // Set up job Job job = new Job(confClone, getJobName(totalJobCount)); // This is a map-only class, skip reduce step job.setNumReduceTasks(0); job.setJarByClass(JobFileProcessor.class); job.setOutputFormatClass(MultiTableOutputFormat.class); TableMapReduceUtil.initTableMapperJob(Constants.HISTORY_RAW_TABLE, scan, JobFileTableMapper.class, JobFileTableMapper.getOutputKeyClass(), JobFileTableMapper.getOutputValueClass(), job); return job; }
From source file:com.twitter.hraven.etl.JobFileRawLoader.java
License:Apache License
/** * @param myHBaseConf//from w w w . j av a 2s . c o m * used to contact HBase and to run jobs against. Should be an HBase * configuration. * @param cluster * for which to process records. * @param processFileSubstring * return rows where the process file path contains this string. If * <code>null</code> or empty string, then no filtering is applied. * @param forceReprocess * whether all jobs for which a file is loaded needs to be * reprocessed. * @return whether all job files for all processRecords were properly * processed. * @throws IOException * @throws ClassNotFoundException * when problems occur setting up the job. * @throws InterruptedException */ private boolean processRecordsFromHBase(Configuration myHBaseConf, String cluster, String processFileSubstring, boolean forceReprocess) throws IOException, InterruptedException, ClassNotFoundException { int failures = 0; ProcessRecordService processRecordService = new ProcessRecordService(myHBaseConf); // Grab all records. List<ProcessRecord> processRecords = processRecordService.getProcessRecords(cluster, PREPROCESSED, Integer.MAX_VALUE, processFileSubstring); try { LOG.info("ProcessRecords for " + cluster + ": " + processRecords.size()); // Bind all MR jobs together with one runID. long now = System.currentTimeMillis(); myHBaseConf.setLong(Constants.MR_RUN_CONF_KEY, now); myHBaseConf.setBoolean(Constants.FORCE_REPROCESS_CONF_KEY, forceReprocess); // Iterate over 0 based list in reverse order for (int j = processRecords.size() - 1; j >= 0; j--) { ProcessRecord processRecord = processRecords.get(j); LOG.info("Processing " + processRecord); boolean success = runRawLoaderJob(myHBaseConf, processRecord.getProcessFile(), processRecords.size()); // Bail out on first failure. if (success) { processRecordService.setProcessState(processRecord, ProcessState.LOADED); } else { failures++; } } } finally { processRecordService.close(); } return (failures == 0); }
From source file:com.twitter.hraven.etl.JobFileRawLoader.java
License:Apache License
/** * @param conf/*from w w w .j av a 2s .com*/ * to use to create and run the job. Should be an HBase * configuration. * @param input * path to the processFile * @param totalJobCount the total number of * jobs that need to be run in this batch. Used in job name. * @return whether all job confs were loaded properly. * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ private boolean runRawLoaderJob(Configuration myHBaseConf, String input, int totalJobCount) throws IOException, InterruptedException, ClassNotFoundException { boolean success; // Turn off speculative execution. // Note: must be BEFORE the job construction with the new mapreduce API. myHBaseConf.setBoolean("mapred.map.tasks.speculative.execution", false); // Set up job Job job = new Job(myHBaseConf, getJobName(totalJobCount)); job.setJarByClass(JobFileRawLoader.class); Path inputPath = new Path(input); if (hdfs.exists(inputPath)) { // Set input job.setInputFormatClass(SequenceFileInputFormat.class); SequenceFileInputFormat.setInputPaths(job, inputPath); job.setMapperClass(JobFileRawLoaderMapper.class); // Set the output format to push data into HBase. job.setOutputFormatClass(TableOutputFormat.class); TableMapReduceUtil.initTableReducerJob(Constants.HISTORY_RAW_TABLE, null, job); job.setOutputKeyClass(JobFileRawLoaderMapper.getOutputKeyClass()); job.setOutputValueClass(JobFileRawLoaderMapper.getOutputValueClass()); // This is a map-only class, skip reduce step job.setNumReduceTasks(0); // Run the job success = job.waitForCompletion(true); if (success) { success = hdfs.delete(inputPath, false); } } else { System.err.println("Unable to find processFile: " + inputPath); success = false; } return success; }
From source file:com.twitter.hraven.hadoopJobMonitor.AppStatusCheckerTest.java
License:Apache License
public boolean testTask(TaskType taskType, String confParamName, long durationMin, final int MAX_RUN, float progress, boolean enforce, boolean dryRun, TIPStatus status, boolean wellBahaved, boolean killed) throws Exception { setTaskAttemptXML(durationMin * MIN, progress); TaskReport taskReport = mock(TaskReport.class); when(taskReport.getCurrentStatus()).thenReturn(status); Collection<TaskAttemptID> attempts = new ArrayList<TaskAttemptID>(); attempts.add(taskAttemptId);/*from ww w .j av a 2 s.c o m*/ when(taskReport.getRunningTaskAttemptIds()).thenReturn(attempts); when(taskReport.getTaskID()).thenReturn(org.apache.hadoop.mapred.TaskID.downgrade(taskId)); when(taskReport.getProgress()).thenReturn(progress); vConf.setBoolean(HadoopJobMonitorConfiguration.DRY_RUN, dryRun); Configuration remoteAppConf = new Configuration(); remoteAppConf.setInt(confParamName, MAX_RUN); remoteAppConf.setBoolean(HadoopJobMonitorConfiguration.enforced(confParamName), enforce); when(taskReport.getStartTime()).thenReturn(now - durationMin * MIN); AppConfiguraiton appConf = new AppConfiguraiton(remoteAppConf, vConf); AppConfCache.getInstance().put(appId, appConf); appStatusChecker.init(); appStatusChecker.loadClientService(); boolean res = appStatusChecker.checkTask(taskType, taskReport, now); if (wellBahaved) assertEquals("Well-bahved task does not pass the check", wellBahaved, res); else assertEquals("Not Well-bahved task passes the check", wellBahaved, res); if (killed) { killCounter++; verify(clientService, times(killCounter)).killTask(any(TaskAttemptID.class), Mockito.anyBoolean()); } else verify(clientService, times(killCounter)).killTask(any(TaskAttemptID.class), Mockito.anyBoolean()); return res; }
From source file:com.twitter.hraven.hadoopJobMonitor.AppStatusCheckerTest.java
License:Apache License
@Test public void testLongJobDryRun() throws IOException, ConfigurationAccessException, YarnException { Configuration remoteAppConf = new Configuration(); remoteAppConf.setInt(HadoopJobMonitorConfiguration.JOB_MAX_LEN_MIN, 10); remoteAppConf.setBoolean( HadoopJobMonitorConfiguration.enforced(HadoopJobMonitorConfiguration.JOB_MAX_LEN_MIN), true); when(appReport.getStartTime()).thenReturn(now - 15 * MIN); AppConfiguraiton appConf = new AppConfiguraiton(remoteAppConf, vConf); AppConfCache.getInstance().put(appId, appConf); appStatusChecker.init();/*from www. j a v a2 s. c om*/ boolean res = appStatusChecker.checkApp(); Assert.assertFalse("does not fail job duration check even though enforce is set", res); verify(rm, times(0)).killApplication(appId); }
From source file:com.twitter.hraven.hadoopJobMonitor.AppStatusCheckerTest.java
License:Apache License
@Test public void testLongJob() throws IOException, ConfigurationAccessException, YarnException { Configuration remoteAppConf = new Configuration(); remoteAppConf.setInt(HadoopJobMonitorConfiguration.JOB_MAX_LEN_MIN, 10); remoteAppConf.setBoolean( HadoopJobMonitorConfiguration.enforced(HadoopJobMonitorConfiguration.JOB_MAX_LEN_MIN), true); when(appReport.getStartTime()).thenReturn(now - 15 * MIN); vConf.setBoolean(HadoopJobMonitorConfiguration.DRY_RUN, false); AppConfiguraiton appConf = new AppConfiguraiton(remoteAppConf, vConf); AppConfCache.getInstance().put(appId, appConf); appStatusChecker.init();/*w ww.j a va 2s.c o m*/ boolean res = appStatusChecker.checkApp(); Assert.assertFalse("does not fail job duration check even though enforce is set", res); verify(rm, times(1)).killApplication(appId); }
From source file:com.twitter.hraven.hadoopJobMonitor.AppStatusCheckerTest.java
License:Apache License
@Test public void testShortJob() throws IOException, ConfigurationAccessException { Configuration remoteAppConf = new Configuration(); remoteAppConf.setInt(HadoopJobMonitorConfiguration.JOB_MAX_LEN_MIN, 10); remoteAppConf.setBoolean( HadoopJobMonitorConfiguration.enforced(HadoopJobMonitorConfiguration.JOB_MAX_LEN_MIN), true); when(appReport.getStartTime()).thenReturn(now - 5 * MIN); AppConfiguraiton appConf = new AppConfiguraiton(remoteAppConf, vConf); AppConfCache.getInstance().put(appId, appConf); appStatusChecker.init();//from w w w. j a va2 s . co m boolean res = appStatusChecker.checkApp(); Assert.assertTrue("fails job duration check even though the job is not too long", res); }
From source file:com.uber.hoodie.common.minicluster.HdfsTestService.java
License:Apache License
/** * Configure the DFS Cluster before launching it. * * @param config The already created Hadoop configuration we'll further configure for HDFS * @param localDFSLocation The location on the local filesystem where cluster data is stored * @param bindIP An IP address we want to force the datanode and namenode to bind to. * @return The updated Configuration object. *//*w w w. j ava 2 s . com*/ private static Configuration configureDFSCluster(Configuration config, String localDFSLocation, String bindIP, int namenodeRpcPort, int namenodeHttpPort, int datanodePort, int datanodeIpcPort, int datanodeHttpPort) { logger.info("HDFS force binding to ip: " + bindIP); config.set(DFSConfigKeys.FS_DEFAULT_NAME_KEY, "hdfs://" + bindIP + ":" + namenodeRpcPort); config.set(DFSConfigKeys.DFS_DATANODE_ADDRESS_KEY, bindIP + ":" + datanodePort); config.set(DFSConfigKeys.DFS_DATANODE_IPC_ADDRESS_KEY, bindIP + ":" + datanodeIpcPort); config.set(DFSConfigKeys.DFS_DATANODE_HTTP_ADDRESS_KEY, bindIP + ":" + datanodeHttpPort); // When a datanode registers with the namenode, the Namenode do a hostname // check of the datanode which will fail on OpenShift due to reverse DNS // issues with the internal IP addresses. This config disables that check, // and will allow a datanode to connect regardless. config.setBoolean("dfs.namenode.datanode.registration.ip-hostname-check", false); config.set("hdfs.minidfs.basedir", localDFSLocation); // allow current user to impersonate others String user = System.getProperty("user.name"); config.set("hadoop.proxyuser." + user + ".groups", "*"); config.set("hadoop.proxyuser." + user + ".hosts", "*"); return config; }
From source file:com.vertica.hadoop.VerticaConfiguration.java
License:Apache License
/** * Sets the Vertica database connection information in the (@link * Configuration)//www. j a va 2 s .co m * * @param conf * the configuration * @param hostnames * one or more hosts in the Vertica cluster * @param database * the name of the Vertica database * @param username * Vertica database username * @param password * Vertica database password * @param port * Vertica database port */ public static void configureVertica(Configuration conf, String[] hostnames, String database, String port, String username, String password) { conf.setBoolean(MAP_SPECULATIVE_EXEC, false); conf.setBoolean(REDUCE_SPECULATIVE_EXEC, false); conf.setStrings(HOSTNAMES_PROP, hostnames); conf.set(DATABASE_PROP, database); conf.set(USERNAME_PROP, username); conf.set(PASSWORD_PROP, password); conf.set(PORT_PROP, port); }
From source file:com.vf.flume.sink.hdfs.BucketWriter.java
License:Apache License
/** * open() is called by append()/*from w ww . j a va 2 s . c o m*/ * @throws IOException * @throws InterruptedException */ private void open() throws IOException, InterruptedException { if ((filePath == null) || (writer == null)) { throw new IOException("Invalid file settings"); } final Configuration config = new Configuration(); // disable FileSystem JVM shutdown hook config.setBoolean("fs.automatic.close", false); // Hadoop is not thread safe when doing certain RPC operations, // including getFileSystem(), when running under Kerberos. // open() must be called by one thread at a time in the JVM. // NOTE: tried synchronizing on the underlying Kerberos principal previously // which caused deadlocks. See FLUME-1231. synchronized (staticLock) { checkAndThrowInterruptedException(); try { long counter = fileExtensionCounter.incrementAndGet(); // String fullFileName = fileName + "." + counter; String fullFileName = fileName; if (fileSuffix != null && fileSuffix.length() > 0) { fullFileName += fileSuffix; } else if (codeC != null) { fullFileName += codeC.getDefaultExtension(); } bucketPath = filePath + "/" + inUsePrefix + fullFileName + inUseSuffix; targetPath = filePath + "/" + fullFileName; LOG.info("Creating " + bucketPath); callWithTimeout(new CallRunner<Void>() { @Override public Void call() throws Exception { if (codeC == null) { // Need to get reference to FS using above config before underlying // writer does in order to avoid shutdown hook & // IllegalStateExceptions if (!mockFsInjected) { fileSystem = new Path(bucketPath).getFileSystem(config); } writer.open(bucketPath); } else { // need to get reference to FS before writer does to // avoid shutdown hook if (!mockFsInjected) { fileSystem = new Path(bucketPath).getFileSystem(config); } writer.open(bucketPath, codeC, compType); } return null; } }); } catch (Exception ex) { sinkCounter.incrementConnectionFailedCount(); if (ex instanceof IOException) { throw (IOException) ex; } else { throw Throwables.propagate(ex); } } } isClosedMethod = getRefIsClosed(); sinkCounter.incrementConnectionCreatedCount(); resetCounters(); // if time-based rolling is enabled, schedule the roll if (rollInterval > 0) { Callable<Void> action = new Callable<Void>() { public Void call() throws Exception { LOG.debug("Rolling file ({}): Roll scheduled after {} sec elapsed.", bucketPath, rollInterval); try { // Roll the file and remove reference from sfWriters map. close(true); } catch (Throwable t) { LOG.error("Unexpected error", t); } return null; } }; timedRollFuture = timedRollerPool.schedule(action, rollInterval, TimeUnit.SECONDS); } isOpen = true; }