Example usage for org.apache.hadoop.conf Configuration setBoolean

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration setBoolean.

Prototype

public void setBoolean(String name, boolean value)

Source Link

Document

Set the value of the name property to a boolean.

Usage

From source file:com.twitter.hraven.etl.JobFileProcessor.java

License:Apache License

/**
 * @param conf//  w ww.java2  s. c o  m
 *          to use to create and run the job
 * @param scan
 *          to be used to scan the raw table.
 * @param totalJobCount
 *          the total number of jobs that need to be run in this batch. Used
 *          in job name.
 * @return The job to be submitted to the cluster.
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
private Job getProcessingJob(Configuration conf, Scan scan, int totalJobCount) throws IOException {

    Configuration confClone = new Configuration(conf);

    // Turn off speculative execution.
    // Note: must be BEFORE the job construction with the new mapreduce API.
    confClone.setBoolean("mapred.map.tasks.speculative.execution", false);

    // Set up job
    Job job = new Job(confClone, getJobName(totalJobCount));

    // This is a map-only class, skip reduce step
    job.setNumReduceTasks(0);
    job.setJarByClass(JobFileProcessor.class);
    job.setOutputFormatClass(MultiTableOutputFormat.class);

    TableMapReduceUtil.initTableMapperJob(Constants.HISTORY_RAW_TABLE, scan, JobFileTableMapper.class,
            JobFileTableMapper.getOutputKeyClass(), JobFileTableMapper.getOutputValueClass(), job);

    return job;
}

From source file:com.twitter.hraven.etl.JobFileRawLoader.java

License:Apache License

/**
 * @param myHBaseConf//from w  w w  . j  av  a  2s . c o m
 *          used to contact HBase and to run jobs against. Should be an HBase
 *          configuration.
 * @param cluster
 *          for which to process records.
 * @param processFileSubstring
 *          return rows where the process file path contains this string. If
 *          <code>null</code> or empty string, then no filtering is applied.
 * @param forceReprocess
 *          whether all jobs for which a file is loaded needs to be
 *          reprocessed.
 * @return whether all job files for all processRecords were properly
 *         processed.
 * @throws IOException
 * @throws ClassNotFoundException
 *           when problems occur setting up the job.
 * @throws InterruptedException
 */
private boolean processRecordsFromHBase(Configuration myHBaseConf, String cluster, String processFileSubstring,
        boolean forceReprocess) throws IOException, InterruptedException, ClassNotFoundException {

    int failures = 0;

    ProcessRecordService processRecordService = new ProcessRecordService(myHBaseConf);
    // Grab all records.
    List<ProcessRecord> processRecords = processRecordService.getProcessRecords(cluster, PREPROCESSED,
            Integer.MAX_VALUE, processFileSubstring);
    try {

        LOG.info("ProcessRecords for " + cluster + ": " + processRecords.size());

        // Bind all MR jobs together with one runID.
        long now = System.currentTimeMillis();
        myHBaseConf.setLong(Constants.MR_RUN_CONF_KEY, now);

        myHBaseConf.setBoolean(Constants.FORCE_REPROCESS_CONF_KEY, forceReprocess);

        // Iterate over 0 based list in reverse order
        for (int j = processRecords.size() - 1; j >= 0; j--) {
            ProcessRecord processRecord = processRecords.get(j);

            LOG.info("Processing " + processRecord);

            boolean success = runRawLoaderJob(myHBaseConf, processRecord.getProcessFile(),
                    processRecords.size());
            // Bail out on first failure.
            if (success) {
                processRecordService.setProcessState(processRecord, ProcessState.LOADED);
            } else {
                failures++;
            }

        }
    } finally {
        processRecordService.close();
    }

    return (failures == 0);
}

From source file:com.twitter.hraven.etl.JobFileRawLoader.java

License:Apache License

/**
 * @param conf/*from  w w w  .j  av  a  2s .com*/
 *          to use to create and run the job. Should be an HBase
 *          configuration.
 * @param input
 *          path to the processFile * @param totalJobCount the total number of
 *          jobs that need to be run in this batch. Used in job name.
 * @return whether all job confs were loaded properly.
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
private boolean runRawLoaderJob(Configuration myHBaseConf, String input, int totalJobCount)
        throws IOException, InterruptedException, ClassNotFoundException {
    boolean success;

    // Turn off speculative execution.
    // Note: must be BEFORE the job construction with the new mapreduce API.
    myHBaseConf.setBoolean("mapred.map.tasks.speculative.execution", false);

    // Set up job
    Job job = new Job(myHBaseConf, getJobName(totalJobCount));
    job.setJarByClass(JobFileRawLoader.class);

    Path inputPath = new Path(input);

    if (hdfs.exists(inputPath)) {

        // Set input
        job.setInputFormatClass(SequenceFileInputFormat.class);
        SequenceFileInputFormat.setInputPaths(job, inputPath);

        job.setMapperClass(JobFileRawLoaderMapper.class);

        // Set the output format to push data into HBase.
        job.setOutputFormatClass(TableOutputFormat.class);
        TableMapReduceUtil.initTableReducerJob(Constants.HISTORY_RAW_TABLE, null, job);

        job.setOutputKeyClass(JobFileRawLoaderMapper.getOutputKeyClass());
        job.setOutputValueClass(JobFileRawLoaderMapper.getOutputValueClass());

        // This is a map-only class, skip reduce step
        job.setNumReduceTasks(0);

        // Run the job
        success = job.waitForCompletion(true);

        if (success) {
            success = hdfs.delete(inputPath, false);
        }

    } else {
        System.err.println("Unable to find processFile: " + inputPath);
        success = false;
    }
    return success;
}

From source file:com.twitter.hraven.hadoopJobMonitor.AppStatusCheckerTest.java

License:Apache License

public boolean testTask(TaskType taskType, String confParamName, long durationMin, final int MAX_RUN,
        float progress, boolean enforce, boolean dryRun, TIPStatus status, boolean wellBahaved, boolean killed)
        throws Exception {
    setTaskAttemptXML(durationMin * MIN, progress);

    TaskReport taskReport = mock(TaskReport.class);
    when(taskReport.getCurrentStatus()).thenReturn(status);
    Collection<TaskAttemptID> attempts = new ArrayList<TaskAttemptID>();
    attempts.add(taskAttemptId);/*from   ww  w  .j  av  a  2 s.c  o  m*/
    when(taskReport.getRunningTaskAttemptIds()).thenReturn(attempts);
    when(taskReport.getTaskID()).thenReturn(org.apache.hadoop.mapred.TaskID.downgrade(taskId));
    when(taskReport.getProgress()).thenReturn(progress);

    vConf.setBoolean(HadoopJobMonitorConfiguration.DRY_RUN, dryRun);
    Configuration remoteAppConf = new Configuration();
    remoteAppConf.setInt(confParamName, MAX_RUN);
    remoteAppConf.setBoolean(HadoopJobMonitorConfiguration.enforced(confParamName), enforce);
    when(taskReport.getStartTime()).thenReturn(now - durationMin * MIN);
    AppConfiguraiton appConf = new AppConfiguraiton(remoteAppConf, vConf);
    AppConfCache.getInstance().put(appId, appConf);
    appStatusChecker.init();
    appStatusChecker.loadClientService();

    boolean res = appStatusChecker.checkTask(taskType, taskReport, now);

    if (wellBahaved)
        assertEquals("Well-bahved task does not pass the check", wellBahaved, res);
    else
        assertEquals("Not Well-bahved task passes the check", wellBahaved, res);
    if (killed) {
        killCounter++;
        verify(clientService, times(killCounter)).killTask(any(TaskAttemptID.class), Mockito.anyBoolean());
    } else
        verify(clientService, times(killCounter)).killTask(any(TaskAttemptID.class), Mockito.anyBoolean());
    return res;
}

From source file:com.twitter.hraven.hadoopJobMonitor.AppStatusCheckerTest.java

License:Apache License

@Test
public void testLongJobDryRun() throws IOException, ConfigurationAccessException, YarnException {
    Configuration remoteAppConf = new Configuration();
    remoteAppConf.setInt(HadoopJobMonitorConfiguration.JOB_MAX_LEN_MIN, 10);
    remoteAppConf.setBoolean(
            HadoopJobMonitorConfiguration.enforced(HadoopJobMonitorConfiguration.JOB_MAX_LEN_MIN), true);
    when(appReport.getStartTime()).thenReturn(now - 15 * MIN);

    AppConfiguraiton appConf = new AppConfiguraiton(remoteAppConf, vConf);
    AppConfCache.getInstance().put(appId, appConf);
    appStatusChecker.init();/*from   www.  j  a v a2  s. c om*/

    boolean res = appStatusChecker.checkApp();
    Assert.assertFalse("does not fail job duration check even though enforce is set", res);
    verify(rm, times(0)).killApplication(appId);
}

From source file:com.twitter.hraven.hadoopJobMonitor.AppStatusCheckerTest.java

License:Apache License

@Test
public void testLongJob() throws IOException, ConfigurationAccessException, YarnException {
    Configuration remoteAppConf = new Configuration();
    remoteAppConf.setInt(HadoopJobMonitorConfiguration.JOB_MAX_LEN_MIN, 10);
    remoteAppConf.setBoolean(
            HadoopJobMonitorConfiguration.enforced(HadoopJobMonitorConfiguration.JOB_MAX_LEN_MIN), true);
    when(appReport.getStartTime()).thenReturn(now - 15 * MIN);
    vConf.setBoolean(HadoopJobMonitorConfiguration.DRY_RUN, false);

    AppConfiguraiton appConf = new AppConfiguraiton(remoteAppConf, vConf);
    AppConfCache.getInstance().put(appId, appConf);
    appStatusChecker.init();/*w  ww.j a va  2s.c  o  m*/

    boolean res = appStatusChecker.checkApp();
    Assert.assertFalse("does not fail job duration check even though enforce is set", res);
    verify(rm, times(1)).killApplication(appId);
}

From source file:com.twitter.hraven.hadoopJobMonitor.AppStatusCheckerTest.java

License:Apache License

@Test
public void testShortJob() throws IOException, ConfigurationAccessException {
    Configuration remoteAppConf = new Configuration();
    remoteAppConf.setInt(HadoopJobMonitorConfiguration.JOB_MAX_LEN_MIN, 10);
    remoteAppConf.setBoolean(
            HadoopJobMonitorConfiguration.enforced(HadoopJobMonitorConfiguration.JOB_MAX_LEN_MIN), true);
    when(appReport.getStartTime()).thenReturn(now - 5 * MIN);

    AppConfiguraiton appConf = new AppConfiguraiton(remoteAppConf, vConf);
    AppConfCache.getInstance().put(appId, appConf);
    appStatusChecker.init();//from  w w  w. j a va2  s  .  co m

    boolean res = appStatusChecker.checkApp();
    Assert.assertTrue("fails job duration check even though the job is not too long", res);
}

From source file:com.uber.hoodie.common.minicluster.HdfsTestService.java

License:Apache License

/**
 * Configure the DFS Cluster before launching it.
 *
 * @param config           The already created Hadoop configuration we'll further configure for HDFS
 * @param localDFSLocation The location on the local filesystem where cluster data is stored
 * @param bindIP           An IP address we want to force the datanode and namenode to bind to.
 * @return The updated Configuration object.
 *//*w w w.  j  ava 2  s  .  com*/
private static Configuration configureDFSCluster(Configuration config, String localDFSLocation, String bindIP,
        int namenodeRpcPort, int namenodeHttpPort, int datanodePort, int datanodeIpcPort,
        int datanodeHttpPort) {

    logger.info("HDFS force binding to ip: " + bindIP);
    config.set(DFSConfigKeys.FS_DEFAULT_NAME_KEY, "hdfs://" + bindIP + ":" + namenodeRpcPort);
    config.set(DFSConfigKeys.DFS_DATANODE_ADDRESS_KEY, bindIP + ":" + datanodePort);
    config.set(DFSConfigKeys.DFS_DATANODE_IPC_ADDRESS_KEY, bindIP + ":" + datanodeIpcPort);
    config.set(DFSConfigKeys.DFS_DATANODE_HTTP_ADDRESS_KEY, bindIP + ":" + datanodeHttpPort);
    // When a datanode registers with the namenode, the Namenode do a hostname
    // check of the datanode which will fail on OpenShift due to reverse DNS
    // issues with the internal IP addresses. This config disables that check,
    // and will allow a datanode to connect regardless.
    config.setBoolean("dfs.namenode.datanode.registration.ip-hostname-check", false);
    config.set("hdfs.minidfs.basedir", localDFSLocation);
    // allow current user to impersonate others
    String user = System.getProperty("user.name");
    config.set("hadoop.proxyuser." + user + ".groups", "*");
    config.set("hadoop.proxyuser." + user + ".hosts", "*");
    return config;
}

From source file:com.vertica.hadoop.VerticaConfiguration.java

License:Apache License

/**
  * Sets the Vertica database connection information in the (@link
  * Configuration)//www. j  a  va 2  s  .co  m
  * 
  * @param conf
  *          the configuration
  * @param hostnames
  *          one or more hosts in the Vertica cluster
  * @param database
  *          the name of the Vertica database
  * @param username
  *          Vertica database username
  * @param password
  *          Vertica database password
  * @param port
  *          Vertica database port         
  */
public static void configureVertica(Configuration conf, String[] hostnames, String database, String port,
        String username, String password) {
    conf.setBoolean(MAP_SPECULATIVE_EXEC, false);
    conf.setBoolean(REDUCE_SPECULATIVE_EXEC, false);

    conf.setStrings(HOSTNAMES_PROP, hostnames);
    conf.set(DATABASE_PROP, database);
    conf.set(USERNAME_PROP, username);
    conf.set(PASSWORD_PROP, password);
    conf.set(PORT_PROP, port);
}

From source file:com.vf.flume.sink.hdfs.BucketWriter.java

License:Apache License

/**
 * open() is called by append()/*from  w  ww . j a  va 2  s .  c o m*/
 * @throws IOException
 * @throws InterruptedException
 */
private void open() throws IOException, InterruptedException {
    if ((filePath == null) || (writer == null)) {
        throw new IOException("Invalid file settings");
    }

    final Configuration config = new Configuration();
    // disable FileSystem JVM shutdown hook
    config.setBoolean("fs.automatic.close", false);

    // Hadoop is not thread safe when doing certain RPC operations,
    // including getFileSystem(), when running under Kerberos.
    // open() must be called by one thread at a time in the JVM.
    // NOTE: tried synchronizing on the underlying Kerberos principal previously
    // which caused deadlocks. See FLUME-1231.
    synchronized (staticLock) {
        checkAndThrowInterruptedException();

        try {
            long counter = fileExtensionCounter.incrementAndGet();

            //        String fullFileName = fileName + "." + counter;
            String fullFileName = fileName;

            if (fileSuffix != null && fileSuffix.length() > 0) {
                fullFileName += fileSuffix;
            } else if (codeC != null) {
                fullFileName += codeC.getDefaultExtension();
            }

            bucketPath = filePath + "/" + inUsePrefix + fullFileName + inUseSuffix;
            targetPath = filePath + "/" + fullFileName;

            LOG.info("Creating " + bucketPath);
            callWithTimeout(new CallRunner<Void>() {
                @Override
                public Void call() throws Exception {
                    if (codeC == null) {
                        // Need to get reference to FS using above config before underlying
                        // writer does in order to avoid shutdown hook &
                        // IllegalStateExceptions
                        if (!mockFsInjected) {
                            fileSystem = new Path(bucketPath).getFileSystem(config);
                        }
                        writer.open(bucketPath);
                    } else {
                        // need to get reference to FS before writer does to
                        // avoid shutdown hook
                        if (!mockFsInjected) {
                            fileSystem = new Path(bucketPath).getFileSystem(config);
                        }
                        writer.open(bucketPath, codeC, compType);
                    }
                    return null;
                }
            });
        } catch (Exception ex) {
            sinkCounter.incrementConnectionFailedCount();
            if (ex instanceof IOException) {
                throw (IOException) ex;
            } else {
                throw Throwables.propagate(ex);
            }
        }
    }
    isClosedMethod = getRefIsClosed();
    sinkCounter.incrementConnectionCreatedCount();
    resetCounters();

    // if time-based rolling is enabled, schedule the roll
    if (rollInterval > 0) {
        Callable<Void> action = new Callable<Void>() {
            public Void call() throws Exception {
                LOG.debug("Rolling file ({}): Roll scheduled after {} sec elapsed.", bucketPath, rollInterval);
                try {
                    // Roll the file and remove reference from sfWriters map.
                    close(true);
                } catch (Throwable t) {
                    LOG.error("Unexpected error", t);
                }
                return null;
            }
        };
        timedRollFuture = timedRollerPool.schedule(action, rollInterval, TimeUnit.SECONDS);
    }

    isOpen = true;
}