Example usage for org.apache.hadoop.conf Configuration setBoolean

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration setBoolean.

Prototype

public void setBoolean(String name, boolean value)

Source Link

Document

Set the value of the name property to a boolean.

Usage

From source file:nl.gridline.zieook.runners.ZieOokRunnerTool.java

License:Apache License

/**
 * @param inputPath input path//from w  ww  .j ava2s .  c o m
 * @param outputPath output path
 * @param inputFormat input format
 * @param mapper mapper class
 * @param mapperKey mapper key
 * @param mapperValue mapper value
 * @param reducer reducer class
 * @param reducerKey reducer key
 * @param reducerValue reducer value
 * @param outputFormat output format
 * @return a ready to run Job
 * @throws IOException
 */
@SuppressWarnings("rawtypes")
public Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat,
        Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey,
        Class<? extends Writable> mapperValue, Class<? extends Reducer> reducer,
        Class<? extends Writable> reducerKey, Class<? extends Writable> reducerValue,
        Class<? extends OutputFormat> outputFormat) throws IOException {

    Job job = new Job(new Configuration(getConf()));
    Configuration jobConf = job.getConfiguration();

    // This never really worked for me: job.setJarByClass... in any case, we already know the jar
    // if (reducer.equals(Reducer.class))
    // {
    // if (mapper.equals(Mapper.class))
    // {
    // throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer");
    // }
    // job.setJarByClass(mapper);
    // }
    // else
    // {
    // job.setJarByClass(reducer);
    // }

    job.setInputFormatClass(inputFormat);
    if (inputPath != null) {
        jobConf.set("mapred.input.dir", inputPath.toString());
    }

    job.setMapperClass(mapper);
    job.setMapOutputKeyClass(mapperKey);
    job.setMapOutputValueClass(mapperValue);

    jobConf.setBoolean("mapred.compress.map.output", true);

    job.setReducerClass(reducer);
    job.setOutputKeyClass(reducerKey);
    job.setOutputValueClass(reducerValue);

    final String name = getCustomJobName(job, mapper, reducer);
    job.setJobName(name);

    job.setOutputFormatClass(outputFormat);
    jobConf.set("mapred.output.dir", outputPath.toString());

    LOG.debug("job setup for: {}", name);

    return job;
}

From source file:org.aesop.runtime.producer.hbase.HBaseEventProducer.java

License:Apache License

/**
 * Interface method implementation. Starts up the SEP consumer
 * @see com.linkedin.databus2.producers.EventProducer#start(long)
 *//*from  w  w w  . j av  a2  s .c  om*/
public void start(long sinceSCN) {
    this.sinceSCN.set(sinceSCN);
    LOGGER.info("Starting SEP subscription : " + this.getName());
    LOGGER.info("ZK connection details [host:port] = {} : {}", this.zkQuorum, this.zkPort);
    LOGGER.info("Using hostname to bind to : " + this.localHost);
    LOGGER.info("Using worker threads : " + this.workerThreads);
    LOGGER.info("Listening to WAL edits from : " + this.sinceSCN);
    try {
        Configuration conf = HBaseConfiguration.create();
        // enable replication to get WAL edits
        conf.setBoolean(HBASE_REPLICATION_CONFIG, true);

        ZooKeeperItf zk = ZkUtil.connect(this.zkQuorum, this.zkPort);
        SepModel sepModel = new SepModelImpl(zk, conf);

        final String subscriptionName = this.getName();

        if (!sepModel.hasSubscription(subscriptionName)) {
            sepModel.addSubscriptionSilent(subscriptionName);
        }
        this.sepConsumer = new SepConsumer(subscriptionName, this.sinceSCN.get(), new RelayAppender(),
                this.workerThreads, this.localHost, zk, conf);
        this.sepConsumer.start();
    } catch (Exception e) {
        LOGGER.error(
                "Error starting WAL edits consumer. Producer not started!. Error message : " + e.getMessage(),
                e);
    }
}

From source file:org.apache.accumulo.core.client.mapreduce.lib.impl.ConfiguratorBase.java

License:Apache License

/**
 * Sets the connector information needed to communicate with Accumulo in this job.
 *
 * <p>/*from  w w w  . j a  v  a2  s .  c o  m*/
 * <b>WARNING:</b> The serialized token is stored in the configuration and shared with all MapReduce tasks. It is BASE64 encoded to provide a charset safe
 * conversion to a string, and is not intended to be secure.
 *
 * @param implementingClass
 *          the class whose name will be used as a prefix for the property configuration key
 * @param conf
 *          the Hadoop configuration object to configure
 * @param principal
 *          a valid Accumulo user name
 * @param token
 *          the user's password
 * @since 1.6.0
 */
public static void setConnectorInfo(Class<?> implementingClass, Configuration conf, String principal,
        AuthenticationToken token) throws AccumuloSecurityException {
    if (isConnectorInfoSet(implementingClass, conf))
        throw new IllegalStateException(
                "Connector info for " + implementingClass.getSimpleName() + " can only be set once per job");

    checkArgument(principal != null, "principal is null");
    checkArgument(token != null, "token is null");
    conf.setBoolean(enumToConfKey(implementingClass, ConnectorInfo.IS_CONFIGURED), true);
    conf.set(enumToConfKey(implementingClass, ConnectorInfo.PRINCIPAL), principal);
    if (token instanceof DelegationTokenImpl) {
        // Avoid serializing the DelegationToken secret in the configuration -- the Job will do that work for us securely
        DelegationTokenImpl delToken = (DelegationTokenImpl) token;
        conf.set(enumToConfKey(implementingClass, ConnectorInfo.TOKEN), TokenSource.JOB.prefix()
                + token.getClass().getName() + ":" + delToken.getServiceName().toString());
    } else {
        conf.set(enumToConfKey(implementingClass, ConnectorInfo.TOKEN),
                TokenSource.INLINE.prefix() + token.getClass().getName() + ":"
                        + Base64.getEncoder().encodeToString(AuthenticationTokenSerializer.serialize(token)));
    }
}

From source file:org.apache.accumulo.core.client.mapreduce.lib.impl.ConfiguratorBase.java

License:Apache License

/**
 * Sets the connector information needed to communicate with Accumulo in this job.
 *
 * <p>/*from  w w w.  j  a  v  a2s  .co m*/
 * Pulls a token file into the Distributed Cache that contains the authentication token in an attempt to be more secure than storing the password in the
 * Configuration. Token file created with "bin/accumulo create-token".
 *
 * @param implementingClass
 *          the class whose name will be used as a prefix for the property configuration key
 * @param conf
 *          the Hadoop configuration object to configure
 * @param principal
 *          a valid Accumulo user name
 * @param tokenFile
 *          the path to the token file in DFS
 * @since 1.6.0
 */
public static void setConnectorInfo(Class<?> implementingClass, Configuration conf, String principal,
        String tokenFile) throws AccumuloSecurityException {
    if (isConnectorInfoSet(implementingClass, conf))
        throw new IllegalStateException(
                "Connector info for " + implementingClass.getSimpleName() + " can only be set once per job");

    checkArgument(principal != null, "principal is null");
    checkArgument(tokenFile != null, "tokenFile is null");

    try {
        DistributedCacheHelper.addCacheFile(new URI(tokenFile), conf);
    } catch (URISyntaxException e) {
        throw new IllegalStateException("Unable to add tokenFile \"" + tokenFile + "\" to distributed cache.");
    }

    conf.setBoolean(enumToConfKey(implementingClass, ConnectorInfo.IS_CONFIGURED), true);
    conf.set(enumToConfKey(implementingClass, ConnectorInfo.PRINCIPAL), principal);
    conf.set(enumToConfKey(implementingClass, ConnectorInfo.TOKEN), TokenSource.FILE.prefix() + tokenFile);
}

From source file:org.apache.accumulo.core.client.mapreduce.lib.impl.InputConfigurator.java

License:Apache License

/**
 * Controls the automatic adjustment of ranges for this job. This feature merges overlapping ranges, then splits them to align with tablet boundaries.
 * Disabling this feature will cause exactly one Map task to be created for each specified range. The default setting is enabled. *
 *
 * <p>/*  w ww .j  ava2  s. c o m*/
 * By default, this feature is <b>enabled</b>.
 *
 * @param implementingClass
 *          the class whose name will be used as a prefix for the property configuration key
 * @param conf
 *          the Hadoop configuration object to configure
 * @param enableFeature
 *          the feature is enabled if true, disabled otherwise
 * @see #setRanges(Class, Configuration, Collection)
 * @since 1.6.0
 */
public static void setAutoAdjustRanges(Class<?> implementingClass, Configuration conf, boolean enableFeature) {
    conf.setBoolean(enumToConfKey(implementingClass, Features.AUTO_ADJUST_RANGES), enableFeature);
}

From source file:org.apache.accumulo.core.client.mapreduce.lib.impl.InputConfigurator.java

License:Apache License

/**
 * Controls the use of the {@link IsolatedScanner} in this job.
 *
 * <p>/*from  ww w  .j  a va  2s  .  c  o m*/
 * By default, this feature is <b>disabled</b>.
 *
 * @param implementingClass
 *          the class whose name will be used as a prefix for the property configuration key
 * @param conf
 *          the Hadoop configuration object to configure
 * @param enableFeature
 *          the feature is enabled if true, disabled otherwise
 * @since 1.6.0
 */
public static void setScanIsolation(Class<?> implementingClass, Configuration conf, boolean enableFeature) {
    conf.setBoolean(enumToConfKey(implementingClass, Features.SCAN_ISOLATION), enableFeature);
}

From source file:org.apache.accumulo.core.client.mapreduce.lib.impl.InputConfigurator.java

License:Apache License

/**
 * Controls the use of the {@link ClientSideIteratorScanner} in this job. Enabling this feature will cause the iterator stack to be constructed within the Map
 * task, rather than within the Accumulo TServer. To use this feature, all classes needed for those iterators must be available on the classpath for the task.
 *
 * <p>//from  w ww .j  av  a 2  s.  c o m
 * By default, this feature is <b>disabled</b>.
 *
 * @param implementingClass
 *          the class whose name will be used as a prefix for the property configuration key
 * @param conf
 *          the Hadoop configuration object to configure
 * @param enableFeature
 *          the feature is enabled if true, disabled otherwise
 * @since 1.6.0
 */
public static void setLocalIterators(Class<?> implementingClass, Configuration conf, boolean enableFeature) {
    conf.setBoolean(enumToConfKey(implementingClass, Features.USE_LOCAL_ITERATORS), enableFeature);
}

From source file:org.apache.accumulo.core.client.mapreduce.lib.impl.InputConfigurator.java

License:Apache License

/**
 * Enable reading offline tables. By default, this feature is disabled and only online tables are scanned. This will make the map reduce job directly read the
 * table's files. If the table is not offline, then the job will fail. If the table comes online during the map reduce job, it is likely that the job will
 * fail.// www . j  av  a 2s  . com
 *
 * <p>
 * To use this option, the map reduce user will need access to read the Accumulo directory in HDFS.
 *
 * <p>
 * Reading the offline table will create the scan time iterator stack in the map process. So any iterators that are configured for the table will need to be
 * on the mapper's classpath.
 *
 * <p>
 * One way to use this feature is to clone a table, take the clone offline, and use the clone as the input table for a map reduce job. If you plan to map
 * reduce over the data many times, it may be better to the compact the table, clone it, take it offline, and use the clone for all map reduce jobs. The
 * reason to do this is that compaction will reduce each tablet in the table to one file, and it is faster to read from one file.
 *
 * <p>
 * There are two possible advantages to reading a tables file directly out of HDFS. First, you may see better read performance. Second, it will support
 * speculative execution better. When reading an online table speculative execution can put more load on an already slow tablet server.
 *
 * <p>
 * By default, this feature is <b>disabled</b>.
 *
 * @param implementingClass
 *          the class whose name will be used as a prefix for the property configuration key
 * @param conf
 *          the Hadoop configuration object to configure
 * @param enableFeature
 *          the feature is enabled if true, disabled otherwise
 * @since 1.6.0
 */
public static void setOfflineTableScan(Class<?> implementingClass, Configuration conf, boolean enableFeature) {
    conf.setBoolean(enumToConfKey(implementingClass, Features.SCAN_OFFLINE), enableFeature);
}

From source file:org.apache.accumulo.core.client.mapreduce.lib.impl.InputConfigurator.java

License:Apache License

/**
 * Controls the use of the {@link BatchScanner} in this job. Using this feature will group ranges by their source tablet per InputSplit and use BatchScanner
 * to read them.// w  w w .j  a va 2  s. c o m
 *
 * <p>
 * By default, this feature is <b>disabled</b>.
 *
 * @param implementingClass
 *          the class whose name will be used as a prefix for the property configuration key
 * @param conf
 *          the Hadoop configuration object to configure
 * @param enableFeature
 *          the feature is enabled if true, disabled otherwise
 * @since 1.7.0
 */
public static void setBatchScan(Class<?> implementingClass, Configuration conf, boolean enableFeature) {
    conf.setBoolean(enumToConfKey(implementingClass, Features.BATCH_SCANNER), enableFeature);
}

From source file:org.apache.accumulo.core.client.mapreduce.lib.impl.OutputConfigurator.java

License:Apache License

/**
 * Sets the directive to create new tables, as necessary. Table names can only be alpha-numeric and underscores.
 *
 * <p>//from  www  .  java 2 s .c  o m
 * By default, this feature is <b>disabled</b>.
 *
 * @param implementingClass
 *          the class whose name will be used as a prefix for the property configuration key
 * @param conf
 *          the Hadoop configuration object to configure
 * @param enableFeature
 *          the feature is enabled if true, disabled otherwise
 * @since 1.6.0
 */
public static void setCreateTables(Class<?> implementingClass, Configuration conf, boolean enableFeature) {
    conf.setBoolean(enumToConfKey(implementingClass, Features.CAN_CREATE_TABLES), enableFeature);
}