Example usage for org.apache.hadoop.conf Configuration getInt

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration getInt.

Prototype

public int getInt(String name, int defaultValue)

Source Link

Document

Get the value of the name property as an int.

Usage

From source file:com.cloudera.llama.am.impl.ThrottleLlamaAM.java

License:Apache License

public ThrottleLlamaAM(Configuration conf, String queue, SingleQueueLlamaAM llamaAM) {
    super(conf);/*  w  w  w  .  j av a2 s  .c  om*/
    this.queue = queue;
    int defaultMaxPlacedRes = conf.getInt(MAX_PLACED_RESERVATIONS_KEY, MAX_PLACED_RESERVATIONS_DEFAULT);
    int defaultMaxQueuedRes = conf.getInt(MAX_QUEUED_RESERVATIONS_KEY, MAX_QUEUED_RESERVATIONS_DEFAULT);
    maxPlacedReservations = conf.getInt(FastFormat.format(MAX_PLACED_RESERVATIONS_QUEUE_KEY, queue),
            defaultMaxPlacedRes);
    maxQueuedReservations = conf.getInt(FastFormat.format(MAX_QUEUED_RESERVATIONS_QUEUE_KEY, queue),
            defaultMaxQueuedRes);
    LOG.info("Throttling queue '{}' max placed '{}' max queued '{}", queue, maxPlacedReservations,
            maxQueuedReservations);
    placedReservations = 0;
    queuedReservations = new LinkedHashMap<UUID, PlacedReservationImpl>();
    this.am = llamaAM;
    am.addListener(this);
    am.setCallback(this);
    thread = new Thread(this, "llama-am-throttle:" + queue);
    thread.setDaemon(true);
}

From source file:com.cloudera.recordservice.examples.terasort.TeraInputFormat.java

License:Apache License

/**
 * Use the input splits to take samples of the input and generate sample
 * keys. By default reads 100,000 keys from 10 locations in the input, sorts
 * them and picks N-1 keys to generate N equally sized partitions.
 * @param job the job to sample/* www  .  j a  v a 2 s  .  co  m*/
 * @param partFile where to write the output file to
 * @throws Throwable if something goes wrong
 */
public static void writePartitionFile(final JobContext job, Path partFile) throws Throwable {
    long t1 = System.currentTimeMillis();
    Configuration conf = job.getConfiguration();
    final TeraInputFormat inFormat = new TeraInputFormat();
    final TextSampler sampler = new TextSampler();
    int partitions = job.getNumReduceTasks();
    long sampleSize = conf.getLong(SAMPLE_SIZE, 100000);
    final List<InputSplit> splits = inFormat.getSplits(job);
    long t2 = System.currentTimeMillis();
    System.out.println("Computing input splits took " + (t2 - t1) + "ms");
    int samples = Math.min(conf.getInt(NUM_PARTITIONS, 10), splits.size());
    System.out.println("Sampling " + samples + " splits of " + splits.size());
    final long recordsPerSample = sampleSize / samples;
    final int sampleStep = splits.size() / samples;
    Thread[] samplerReader = new Thread[samples];
    SamplerThreadGroup threadGroup = new SamplerThreadGroup("Sampler Reader Thread Group");
    // take N samples from different parts of the input
    for (int i = 0; i < samples; ++i) {
        final int idx = i;
        samplerReader[i] = new Thread(threadGroup, "Sampler Reader " + idx) {
            {
                setDaemon(true);
            }

            @Override
            public void run() {
                long records = 0;
                try {
                    TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(),
                            new TaskAttemptID());
                    RecordReader<Text, Text> reader = inFormat.createRecordReader(splits.get(sampleStep * idx),
                            context);
                    reader.initialize(splits.get(sampleStep * idx), context);
                    while (reader.nextKeyValue()) {
                        sampler.addKey(new Text(reader.getCurrentKey()));
                        records += 1;
                        if (recordsPerSample <= records) {
                            break;
                        }
                    }
                } catch (IOException ie) {
                    System.err.println(
                            "Got an exception while reading splits " + StringUtils.stringifyException(ie));
                    throw new RuntimeException(ie);
                } catch (InterruptedException e) {

                }
            }
        };
        samplerReader[i].start();
    }
    FileSystem outFs = partFile.getFileSystem(conf);
    DataOutputStream writer = outFs.create(partFile, true, 64 * 1024, (short) 10,
            outFs.getDefaultBlockSize(partFile));
    for (int i = 0; i < samples; i++) {
        try {
            samplerReader[i].join();
            if (threadGroup.getThrowable() != null) {
                throw threadGroup.getThrowable();
            }
        } catch (InterruptedException e) {
        }
    }
    for (Text split : sampler.createPartitions(partitions)) {
        split.write(writer);
    }
    writer.close();
    long t3 = System.currentTimeMillis();
    System.out.println("Computing parititions took " + (t3 - t2) + "ms");
}

From source file:com.cloudera.recordservice.mr.PlanUtil.java

License:Apache License

/**
 * Creates a builder for RecordService planner client from the configuration.
 *//*  ww w.  ja v  a 2  s.  c  o m*/
public static Builder getBuilder(Configuration conf) {
    RecordServicePlannerClient.Builder builder = new RecordServicePlannerClient.Builder();
    int connectionTimeoutMs = conf.getInt(ConfVars.PLANNER_CONNECTION_TIMEOUT_MS_CONF.name, -1);
    int rpcTimeoutMs = conf.getInt(ConfVars.PLANNER_RPC_TIMEOUT_MS_CONF.name, -1);
    int maxAttempts = conf.getInt(ConfVars.PLANNER_RETRY_ATTEMPTS_CONF.name, -1);
    int sleepDurationMs = conf.getInt(ConfVars.PLANNER_RETRY_SLEEP_MS_CONF.name, -1);
    int maxTasks = conf.getInt(ConfVars.PLANNER_REQUEST_MAX_TASKS.name, -1);

    if (connectionTimeoutMs != -1)
        builder.setConnectionTimeoutMs(connectionTimeoutMs);
    if (rpcTimeoutMs != -1)
        builder.setRpcTimeoutMs(rpcTimeoutMs);
    if (maxAttempts != -1)
        builder.setMaxAttempts(maxAttempts);
    if (sleepDurationMs != -1)
        builder.setSleepDurationMs(sleepDurationMs);
    if (maxTasks != -1)
        builder.setMaxTasks(maxTasks);

    return builder;
}

From source file:com.cloudera.recordservice.mr.WorkerUtil.java

License:Apache License

/**
 * Creates a builder for RecordService worker client from the configuration and
 * the delegation token.//from   www .j a va  2s .  c  o m
 * @param jobConf the hadoop configuration
 * @param delegationToken the delegation token that the worker client should use to
 *                        talk to the RS worker process.
 * @throws IOException
 */
public static Builder getBuilder(Configuration jobConf, DelegationToken delegationToken) {
    // Try to get the delegation token from the credentials. If it is there, use it.
    RecordServiceWorkerClient.Builder builder = new RecordServiceWorkerClient.Builder();
    int fetchSize = jobConf.getInt(ConfVars.FETCH_SIZE_CONF.name, DEFAULT_FETCH_SIZE);
    long memLimit = jobConf.getLong(ConfVars.MEM_LIMIT_CONF.name, -1);
    long limit = jobConf.getLong(ConfVars.RECORDS_LIMIT_CONF.name, -1);
    int maxAttempts = jobConf.getInt(ConfVars.WORKER_RETRY_ATTEMPTS_CONF.name, -1);
    int taskSleepMs = jobConf.getInt(ConfVars.WORKER_RETRY_SLEEP_MS_CONF.name, -1);
    int connectionTimeoutMs = jobConf.getInt(ConfVars.WORKER_CONNECTION_TIMEOUT_MS_CONF.name, -1);
    int rpcTimeoutMs = jobConf.getInt(ConfVars.WORKER_RPC_TIMEOUT_MS_CONF.name, -1);
    boolean enableLogging = jobConf.getBoolean(ConfVars.WORKER_ENABLE_SERVER_LOGGING_CONF.name, false);

    if (fetchSize != -1)
        builder.setFetchSize(fetchSize);
    if (memLimit != -1)
        builder.setMemLimit(memLimit);
    if (limit != -1)
        builder.setLimit(limit);
    if (maxAttempts != -1)
        builder.setMaxAttempts(maxAttempts);
    if (taskSleepMs != -1)
        builder.setSleepDurationMs(taskSleepMs);
    if (connectionTimeoutMs != -1)
        builder.setConnectionTimeoutMs(connectionTimeoutMs);
    if (rpcTimeoutMs != -1)
        builder.setRpcTimeoutMs(rpcTimeoutMs);
    if (enableLogging)
        builder.setLoggingLevel(LOG);
    if (delegationToken != null)
        builder.setDelegationToken(delegationToken);

    return builder;
}

From source file:com.cloudera.recordservice.mr.ZooKeeperUtil.java

License:Apache License

/**
 * Returns a list of network addresses for the RecordService planners currently
 * available as maintained by ZooKeeper.
 * @param conf The input client job configuration
 * @return A list of <code>NetworkAddress</code>es for all the planners available
 *///ww w .  j a  v  a2 s. c  o  m
public static List<NetworkAddress> getPlanners(Configuration conf) throws IOException {
    String connectionString = conf.get(ConfVars.ZOOKEEPER_CONNECTION_STRING_CONF.name);
    if (connectionString == null || connectionString.trim().isEmpty()) {
        throw new IllegalArgumentException("Zookeeper connect string has to be specified through "
                + ConfVars.ZOOKEEPER_CONNECTION_STRING_CONF.name);
    }
    LOGGER.info("Connecting to zookeeper at: " + connectionString);

    int connectionTimeout = conf.getInt(ConfVars.ZOOKEEPER_CONNECT_TIMEOUTMILLIS_CONF.name,
            CuratorFrameworkFactory.builder().getConnectionTimeoutMs());
    LOGGER.info("Zookeeper connection timeout: " + connectionTimeout);

    String rootNode = conf.get(ConfVars.ZOOKEEPER_ZNODE_CONF.name, RecordServiceConfig.ZOOKEEPER_ZNODE_DEFAULT);
    LOGGER.info("Zookeeper root: " + rootNode);

    CuratorFramework cf = CuratorFrameworkFactory.builder().connectString(connectionString)
            .connectionTimeoutMs(connectionTimeout).aclProvider(new ZooKeeperACLProvider())
            .retryPolicy(new ExponentialBackoffRetry(1000, 3)).build();
    cf.start();

    List<NetworkAddress> result = new ArrayList<NetworkAddress>();
    try {
        for (String path : cf.getChildren().forPath(rootNode + "/planners")) {
            NetworkAddress addr = parsePath(path);
            if (addr != null)
                result.add(parsePath(path));
        }
    } catch (Exception e) {
        cf.close();
        throw new IOException("Could not obtain planner membership" + " from " + connectionString
                + ". Error message: " + e.getMessage(), e);
    }
    cf.close();
    return result;
}

From source file:com.cloudera.sa.hbase.to.hdfs.utils.NMapInputFormat.java

License:Apache License

public static int getNumMapTasks(Configuration conf) {
    return conf.getInt(NMAPS_KEY, 1);
}

From source file:com.cloudera.science.quince.LoadVariantsTool.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    JCommander jc = new JCommander(this);
    try {//from  w ww . j a v a 2 s .  c o  m
        jc.parse(args);
    } catch (ParameterException e) {
        jc.usage();
        return 1;
    }

    if (paths == null || paths.size() != 2) {
        jc.usage();
        return 1;
    }

    String inputPath = paths.get(0);
    String outputPath = paths.get(1);

    Configuration conf = getConf();
    // Copy records to avoid problem with Parquet string statistics not being correct.
    // This can be removed from parquet 1.8.0
    // (see https://issues.apache.org/jira/browse/PARQUET-251).
    conf.setBoolean(DatasetKeyOutputFormat.KITE_COPY_RECORDS, true);

    Path path = new Path(inputPath);

    if (path.getName().endsWith(".vcf")) {
        int size = 500000;
        byte[] bytes = new byte[size];
        InputStream inputStream = path.getFileSystem(conf).open(path);
        inputStream.read(bytes, 0, size);
        conf.set(VariantContextToVariantFn.VARIANT_HEADER, Base64.encodeBase64String(bytes));
    }

    Pipeline pipeline = new MRPipeline(getClass(), conf);
    PCollection<Variant> records = readVariants(path, conf, pipeline);

    PCollection<FlatVariant> flatRecords = records.parallelDo(new FlattenVariantFn(),
            Avros.specifics(FlatVariant.class));

    DatasetDescriptor desc = new DatasetDescriptor.Builder().schema(FlatVariant.getClassSchema())
            .partitionStrategy(buildPartitionStrategy(segmentSize)).format(Formats.PARQUET)
            .compressionType(CompressionType.Uncompressed).build();

    View<FlatVariant> dataset;
    if (Datasets.exists(outputPath)) {
        dataset = Datasets.load(outputPath, FlatVariant.class).getDataset().with("sample_group", sampleGroup);
    } else {
        dataset = Datasets.create(outputPath, desc, FlatVariant.class).getDataset().with("sample_group",
                sampleGroup);
    }

    int numReducers = conf.getInt("mapreduce.job.reduces", 1);
    System.out.println("Num reducers: " + numReducers);

    final Schema sortKeySchema = SchemaBuilder.record("sortKey").fields().requiredString("sampleId")
            .endRecord();

    PCollection<FlatVariant> partitioned = CrunchDatasets.partitionAndSort(flatRecords, dataset,
            new FlatVariantRecordMapFn(sortKeySchema), sortKeySchema, numReducers, 1);

    try {
        Target.WriteMode writeMode = overwrite ? Target.WriteMode.OVERWRITE : Target.WriteMode.DEFAULT;
        pipeline.write(partitioned, CrunchDatasets.asTarget(dataset), writeMode);
    } catch (CrunchRuntimeException e) {
        LOG.error("Crunch runtime error", e);
        return 1;
    }

    PipelineResult result = pipeline.done();
    return result.succeeded() ? 0 : 1;

}

From source file:com.cloudera.spark.bulkload.TotalOrderPartitioner.java

License:Apache License

/**
   * Read in the partition file and build indexing data structures.
   * If the keytype is {@link BinaryComparable} and
   * <tt>total.order.partitioner.natural.order</tt> is not false, a trie
   * of the first <tt>total.order.partitioner.max.trie.depth</tt>(2) + 1 bytes
   * will be built. Otherwise, keys will be located using a binary search of
   * the partition keyset using the {@link RawComparator}
   * defined for this job. The input file must be sorted with the same
   * comparator and contain {@link Job#getNumReduceTasks()} - 1 keys.
   *//*ww w .  j  av  a  2s .  c  om*/
  @SuppressWarnings("unchecked") // keytype from conf not static
  public void setConf(Configuration conf) {
      try {
          this.conf = conf;
          String parts = getPartitionFile(conf);
          final Path partFile = new Path(parts);
          final FileSystem fs = (DEFAULT_PATH.equals(parts)) ? FileSystem.getLocal(conf) // assume in DistributedCache
                  : partFile.getFileSystem(conf);

          Job job = new Job(conf);
          Class<K> keyClass = (Class<K>) job.getMapOutputKeyClass();
          K[] splitPoints = readPartitions(fs, partFile, keyClass, conf);
          if (splitPoints.length != job.getNumReduceTasks() - 1) {
              throw new IOException("Wrong number of partitions in keyset");
          }
          RawComparator<K> comparator = (RawComparator<K>) job.getSortComparator();
          for (int i = 0; i < splitPoints.length - 1; ++i) {
              if (comparator.compare(splitPoints[i], splitPoints[i + 1]) >= 0) {
                  throw new IOException("Split points are out of order");
              }
          }
          boolean natOrder = conf.getBoolean(NATURAL_ORDER, true);
          if (natOrder && BinaryComparable.class.isAssignableFrom(keyClass)) {
              partitions = buildTrie((BinaryComparable[]) splitPoints, 0, splitPoints.length, new byte[0],
                      // Now that blocks of identical splitless trie nodes are 
                      // represented reentrantly, and we develop a leaf for any trie
                      // node with only one split point, the only reason for a depth
                      // limit is to refute stack overflow or bloat in the pathological
                      // case where the split points are long and mostly look like bytes 
                      // iii...iixii...iii   .  Therefore, we make the default depth
                      // limit large but not huge.
                      conf.getInt(MAX_TRIE_DEPTH, 200));
          } else {
              partitions = new BinarySearchNode(splitPoints, comparator);
          }
      } catch (IOException e) {
          throw new IllegalArgumentException("Can't read partitions file", e);
      }
  }

From source file:com.cloudera.sqoop.mapreduce.AutoProgressMapper.java

License:Apache License

/**
 * Set configuration parameters for the auto-progress thread.
 *//* w w w  .  j  a v  a  2s . c  o m*/
private void configureAutoProgress(Configuration job) {
    this.maxProgressPeriod = job.getInt(MAX_PROGRESS_PERIOD_KEY, DEFAULT_MAX_PROGRESS);
    this.sleepInterval = job.getInt(SLEEP_INTERVAL_KEY, DEFAULT_SLEEP_INTERVAL);
    this.reportInterval = job.getInt(REPORT_INTERVAL_KEY, DEFAULT_REPORT_INTERVAL);

    if (this.reportInterval < 1) {
        LOG.warn("Invalid " + REPORT_INTERVAL_KEY + "; setting to " + DEFAULT_REPORT_INTERVAL);
        this.reportInterval = DEFAULT_REPORT_INTERVAL;
    }

    if (this.sleepInterval > this.reportInterval || this.sleepInterval < 1) {
        LOG.warn("Invalid " + SLEEP_INTERVAL_KEY + "; setting to " + DEFAULT_SLEEP_INTERVAL);
        this.sleepInterval = DEFAULT_SLEEP_INTERVAL;
    }

    if (this.maxProgressPeriod < 0) {
        LOG.warn("Invalid " + MAX_PROGRESS_PERIOD_KEY + "; setting to " + DEFAULT_MAX_PROGRESS);
        this.maxProgressPeriod = DEFAULT_MAX_PROGRESS;
    }
}

From source file:com.cloudera.sqoop.shims.Apache22HadoopShim.java

License:Apache License

@Override
public int getConfNumMaps(Configuration conf) {
    return conf.getInt(JobContext.NUM_MAPS, 1);
}