List of usage examples for org.apache.hadoop.conf Configuration getInt
public int getInt(String name, int defaultValue)
name
property as an int
. From source file:com.cloudera.llama.am.impl.ThrottleLlamaAM.java
License:Apache License
public ThrottleLlamaAM(Configuration conf, String queue, SingleQueueLlamaAM llamaAM) { super(conf);/* w w w . j av a2 s .c om*/ this.queue = queue; int defaultMaxPlacedRes = conf.getInt(MAX_PLACED_RESERVATIONS_KEY, MAX_PLACED_RESERVATIONS_DEFAULT); int defaultMaxQueuedRes = conf.getInt(MAX_QUEUED_RESERVATIONS_KEY, MAX_QUEUED_RESERVATIONS_DEFAULT); maxPlacedReservations = conf.getInt(FastFormat.format(MAX_PLACED_RESERVATIONS_QUEUE_KEY, queue), defaultMaxPlacedRes); maxQueuedReservations = conf.getInt(FastFormat.format(MAX_QUEUED_RESERVATIONS_QUEUE_KEY, queue), defaultMaxQueuedRes); LOG.info("Throttling queue '{}' max placed '{}' max queued '{}", queue, maxPlacedReservations, maxQueuedReservations); placedReservations = 0; queuedReservations = new LinkedHashMap<UUID, PlacedReservationImpl>(); this.am = llamaAM; am.addListener(this); am.setCallback(this); thread = new Thread(this, "llama-am-throttle:" + queue); thread.setDaemon(true); }
From source file:com.cloudera.recordservice.examples.terasort.TeraInputFormat.java
License:Apache License
/** * Use the input splits to take samples of the input and generate sample * keys. By default reads 100,000 keys from 10 locations in the input, sorts * them and picks N-1 keys to generate N equally sized partitions. * @param job the job to sample/* www . j a v a 2 s . co m*/ * @param partFile where to write the output file to * @throws Throwable if something goes wrong */ public static void writePartitionFile(final JobContext job, Path partFile) throws Throwable { long t1 = System.currentTimeMillis(); Configuration conf = job.getConfiguration(); final TeraInputFormat inFormat = new TeraInputFormat(); final TextSampler sampler = new TextSampler(); int partitions = job.getNumReduceTasks(); long sampleSize = conf.getLong(SAMPLE_SIZE, 100000); final List<InputSplit> splits = inFormat.getSplits(job); long t2 = System.currentTimeMillis(); System.out.println("Computing input splits took " + (t2 - t1) + "ms"); int samples = Math.min(conf.getInt(NUM_PARTITIONS, 10), splits.size()); System.out.println("Sampling " + samples + " splits of " + splits.size()); final long recordsPerSample = sampleSize / samples; final int sampleStep = splits.size() / samples; Thread[] samplerReader = new Thread[samples]; SamplerThreadGroup threadGroup = new SamplerThreadGroup("Sampler Reader Thread Group"); // take N samples from different parts of the input for (int i = 0; i < samples; ++i) { final int idx = i; samplerReader[i] = new Thread(threadGroup, "Sampler Reader " + idx) { { setDaemon(true); } @Override public void run() { long records = 0; try { TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID()); RecordReader<Text, Text> reader = inFormat.createRecordReader(splits.get(sampleStep * idx), context); reader.initialize(splits.get(sampleStep * idx), context); while (reader.nextKeyValue()) { sampler.addKey(new Text(reader.getCurrentKey())); records += 1; if (recordsPerSample <= records) { break; } } } catch (IOException ie) { System.err.println( "Got an exception while reading splits " + StringUtils.stringifyException(ie)); throw new RuntimeException(ie); } catch (InterruptedException e) { } } }; samplerReader[i].start(); } FileSystem outFs = partFile.getFileSystem(conf); DataOutputStream writer = outFs.create(partFile, true, 64 * 1024, (short) 10, outFs.getDefaultBlockSize(partFile)); for (int i = 0; i < samples; i++) { try { samplerReader[i].join(); if (threadGroup.getThrowable() != null) { throw threadGroup.getThrowable(); } } catch (InterruptedException e) { } } for (Text split : sampler.createPartitions(partitions)) { split.write(writer); } writer.close(); long t3 = System.currentTimeMillis(); System.out.println("Computing parititions took " + (t3 - t2) + "ms"); }
From source file:com.cloudera.recordservice.mr.PlanUtil.java
License:Apache License
/** * Creates a builder for RecordService planner client from the configuration. *//* ww w. ja v a 2 s. c o m*/ public static Builder getBuilder(Configuration conf) { RecordServicePlannerClient.Builder builder = new RecordServicePlannerClient.Builder(); int connectionTimeoutMs = conf.getInt(ConfVars.PLANNER_CONNECTION_TIMEOUT_MS_CONF.name, -1); int rpcTimeoutMs = conf.getInt(ConfVars.PLANNER_RPC_TIMEOUT_MS_CONF.name, -1); int maxAttempts = conf.getInt(ConfVars.PLANNER_RETRY_ATTEMPTS_CONF.name, -1); int sleepDurationMs = conf.getInt(ConfVars.PLANNER_RETRY_SLEEP_MS_CONF.name, -1); int maxTasks = conf.getInt(ConfVars.PLANNER_REQUEST_MAX_TASKS.name, -1); if (connectionTimeoutMs != -1) builder.setConnectionTimeoutMs(connectionTimeoutMs); if (rpcTimeoutMs != -1) builder.setRpcTimeoutMs(rpcTimeoutMs); if (maxAttempts != -1) builder.setMaxAttempts(maxAttempts); if (sleepDurationMs != -1) builder.setSleepDurationMs(sleepDurationMs); if (maxTasks != -1) builder.setMaxTasks(maxTasks); return builder; }
From source file:com.cloudera.recordservice.mr.WorkerUtil.java
License:Apache License
/** * Creates a builder for RecordService worker client from the configuration and * the delegation token.//from www .j a va 2s . c o m * @param jobConf the hadoop configuration * @param delegationToken the delegation token that the worker client should use to * talk to the RS worker process. * @throws IOException */ public static Builder getBuilder(Configuration jobConf, DelegationToken delegationToken) { // Try to get the delegation token from the credentials. If it is there, use it. RecordServiceWorkerClient.Builder builder = new RecordServiceWorkerClient.Builder(); int fetchSize = jobConf.getInt(ConfVars.FETCH_SIZE_CONF.name, DEFAULT_FETCH_SIZE); long memLimit = jobConf.getLong(ConfVars.MEM_LIMIT_CONF.name, -1); long limit = jobConf.getLong(ConfVars.RECORDS_LIMIT_CONF.name, -1); int maxAttempts = jobConf.getInt(ConfVars.WORKER_RETRY_ATTEMPTS_CONF.name, -1); int taskSleepMs = jobConf.getInt(ConfVars.WORKER_RETRY_SLEEP_MS_CONF.name, -1); int connectionTimeoutMs = jobConf.getInt(ConfVars.WORKER_CONNECTION_TIMEOUT_MS_CONF.name, -1); int rpcTimeoutMs = jobConf.getInt(ConfVars.WORKER_RPC_TIMEOUT_MS_CONF.name, -1); boolean enableLogging = jobConf.getBoolean(ConfVars.WORKER_ENABLE_SERVER_LOGGING_CONF.name, false); if (fetchSize != -1) builder.setFetchSize(fetchSize); if (memLimit != -1) builder.setMemLimit(memLimit); if (limit != -1) builder.setLimit(limit); if (maxAttempts != -1) builder.setMaxAttempts(maxAttempts); if (taskSleepMs != -1) builder.setSleepDurationMs(taskSleepMs); if (connectionTimeoutMs != -1) builder.setConnectionTimeoutMs(connectionTimeoutMs); if (rpcTimeoutMs != -1) builder.setRpcTimeoutMs(rpcTimeoutMs); if (enableLogging) builder.setLoggingLevel(LOG); if (delegationToken != null) builder.setDelegationToken(delegationToken); return builder; }
From source file:com.cloudera.recordservice.mr.ZooKeeperUtil.java
License:Apache License
/** * Returns a list of network addresses for the RecordService planners currently * available as maintained by ZooKeeper. * @param conf The input client job configuration * @return A list of <code>NetworkAddress</code>es for all the planners available *///ww w . j a v a2 s. c o m public static List<NetworkAddress> getPlanners(Configuration conf) throws IOException { String connectionString = conf.get(ConfVars.ZOOKEEPER_CONNECTION_STRING_CONF.name); if (connectionString == null || connectionString.trim().isEmpty()) { throw new IllegalArgumentException("Zookeeper connect string has to be specified through " + ConfVars.ZOOKEEPER_CONNECTION_STRING_CONF.name); } LOGGER.info("Connecting to zookeeper at: " + connectionString); int connectionTimeout = conf.getInt(ConfVars.ZOOKEEPER_CONNECT_TIMEOUTMILLIS_CONF.name, CuratorFrameworkFactory.builder().getConnectionTimeoutMs()); LOGGER.info("Zookeeper connection timeout: " + connectionTimeout); String rootNode = conf.get(ConfVars.ZOOKEEPER_ZNODE_CONF.name, RecordServiceConfig.ZOOKEEPER_ZNODE_DEFAULT); LOGGER.info("Zookeeper root: " + rootNode); CuratorFramework cf = CuratorFrameworkFactory.builder().connectString(connectionString) .connectionTimeoutMs(connectionTimeout).aclProvider(new ZooKeeperACLProvider()) .retryPolicy(new ExponentialBackoffRetry(1000, 3)).build(); cf.start(); List<NetworkAddress> result = new ArrayList<NetworkAddress>(); try { for (String path : cf.getChildren().forPath(rootNode + "/planners")) { NetworkAddress addr = parsePath(path); if (addr != null) result.add(parsePath(path)); } } catch (Exception e) { cf.close(); throw new IOException("Could not obtain planner membership" + " from " + connectionString + ". Error message: " + e.getMessage(), e); } cf.close(); return result; }
From source file:com.cloudera.sa.hbase.to.hdfs.utils.NMapInputFormat.java
License:Apache License
public static int getNumMapTasks(Configuration conf) { return conf.getInt(NMAPS_KEY, 1); }
From source file:com.cloudera.science.quince.LoadVariantsTool.java
License:Open Source License
@Override public int run(String[] args) throws Exception { JCommander jc = new JCommander(this); try {//from w ww . j a v a 2 s . c o m jc.parse(args); } catch (ParameterException e) { jc.usage(); return 1; } if (paths == null || paths.size() != 2) { jc.usage(); return 1; } String inputPath = paths.get(0); String outputPath = paths.get(1); Configuration conf = getConf(); // Copy records to avoid problem with Parquet string statistics not being correct. // This can be removed from parquet 1.8.0 // (see https://issues.apache.org/jira/browse/PARQUET-251). conf.setBoolean(DatasetKeyOutputFormat.KITE_COPY_RECORDS, true); Path path = new Path(inputPath); if (path.getName().endsWith(".vcf")) { int size = 500000; byte[] bytes = new byte[size]; InputStream inputStream = path.getFileSystem(conf).open(path); inputStream.read(bytes, 0, size); conf.set(VariantContextToVariantFn.VARIANT_HEADER, Base64.encodeBase64String(bytes)); } Pipeline pipeline = new MRPipeline(getClass(), conf); PCollection<Variant> records = readVariants(path, conf, pipeline); PCollection<FlatVariant> flatRecords = records.parallelDo(new FlattenVariantFn(), Avros.specifics(FlatVariant.class)); DatasetDescriptor desc = new DatasetDescriptor.Builder().schema(FlatVariant.getClassSchema()) .partitionStrategy(buildPartitionStrategy(segmentSize)).format(Formats.PARQUET) .compressionType(CompressionType.Uncompressed).build(); View<FlatVariant> dataset; if (Datasets.exists(outputPath)) { dataset = Datasets.load(outputPath, FlatVariant.class).getDataset().with("sample_group", sampleGroup); } else { dataset = Datasets.create(outputPath, desc, FlatVariant.class).getDataset().with("sample_group", sampleGroup); } int numReducers = conf.getInt("mapreduce.job.reduces", 1); System.out.println("Num reducers: " + numReducers); final Schema sortKeySchema = SchemaBuilder.record("sortKey").fields().requiredString("sampleId") .endRecord(); PCollection<FlatVariant> partitioned = CrunchDatasets.partitionAndSort(flatRecords, dataset, new FlatVariantRecordMapFn(sortKeySchema), sortKeySchema, numReducers, 1); try { Target.WriteMode writeMode = overwrite ? Target.WriteMode.OVERWRITE : Target.WriteMode.DEFAULT; pipeline.write(partitioned, CrunchDatasets.asTarget(dataset), writeMode); } catch (CrunchRuntimeException e) { LOG.error("Crunch runtime error", e); return 1; } PipelineResult result = pipeline.done(); return result.succeeded() ? 0 : 1; }
From source file:com.cloudera.spark.bulkload.TotalOrderPartitioner.java
License:Apache License
/** * Read in the partition file and build indexing data structures. * If the keytype is {@link BinaryComparable} and * <tt>total.order.partitioner.natural.order</tt> is not false, a trie * of the first <tt>total.order.partitioner.max.trie.depth</tt>(2) + 1 bytes * will be built. Otherwise, keys will be located using a binary search of * the partition keyset using the {@link RawComparator} * defined for this job. The input file must be sorted with the same * comparator and contain {@link Job#getNumReduceTasks()} - 1 keys. *//*ww w . j av a 2s . c om*/ @SuppressWarnings("unchecked") // keytype from conf not static public void setConf(Configuration conf) { try { this.conf = conf; String parts = getPartitionFile(conf); final Path partFile = new Path(parts); final FileSystem fs = (DEFAULT_PATH.equals(parts)) ? FileSystem.getLocal(conf) // assume in DistributedCache : partFile.getFileSystem(conf); Job job = new Job(conf); Class<K> keyClass = (Class<K>) job.getMapOutputKeyClass(); K[] splitPoints = readPartitions(fs, partFile, keyClass, conf); if (splitPoints.length != job.getNumReduceTasks() - 1) { throw new IOException("Wrong number of partitions in keyset"); } RawComparator<K> comparator = (RawComparator<K>) job.getSortComparator(); for (int i = 0; i < splitPoints.length - 1; ++i) { if (comparator.compare(splitPoints[i], splitPoints[i + 1]) >= 0) { throw new IOException("Split points are out of order"); } } boolean natOrder = conf.getBoolean(NATURAL_ORDER, true); if (natOrder && BinaryComparable.class.isAssignableFrom(keyClass)) { partitions = buildTrie((BinaryComparable[]) splitPoints, 0, splitPoints.length, new byte[0], // Now that blocks of identical splitless trie nodes are // represented reentrantly, and we develop a leaf for any trie // node with only one split point, the only reason for a depth // limit is to refute stack overflow or bloat in the pathological // case where the split points are long and mostly look like bytes // iii...iixii...iii . Therefore, we make the default depth // limit large but not huge. conf.getInt(MAX_TRIE_DEPTH, 200)); } else { partitions = new BinarySearchNode(splitPoints, comparator); } } catch (IOException e) { throw new IllegalArgumentException("Can't read partitions file", e); } }
From source file:com.cloudera.sqoop.mapreduce.AutoProgressMapper.java
License:Apache License
/** * Set configuration parameters for the auto-progress thread. *//* w w w . j a v a 2s . c o m*/ private void configureAutoProgress(Configuration job) { this.maxProgressPeriod = job.getInt(MAX_PROGRESS_PERIOD_KEY, DEFAULT_MAX_PROGRESS); this.sleepInterval = job.getInt(SLEEP_INTERVAL_KEY, DEFAULT_SLEEP_INTERVAL); this.reportInterval = job.getInt(REPORT_INTERVAL_KEY, DEFAULT_REPORT_INTERVAL); if (this.reportInterval < 1) { LOG.warn("Invalid " + REPORT_INTERVAL_KEY + "; setting to " + DEFAULT_REPORT_INTERVAL); this.reportInterval = DEFAULT_REPORT_INTERVAL; } if (this.sleepInterval > this.reportInterval || this.sleepInterval < 1) { LOG.warn("Invalid " + SLEEP_INTERVAL_KEY + "; setting to " + DEFAULT_SLEEP_INTERVAL); this.sleepInterval = DEFAULT_SLEEP_INTERVAL; } if (this.maxProgressPeriod < 0) { LOG.warn("Invalid " + MAX_PROGRESS_PERIOD_KEY + "; setting to " + DEFAULT_MAX_PROGRESS); this.maxProgressPeriod = DEFAULT_MAX_PROGRESS; } }
From source file:com.cloudera.sqoop.shims.Apache22HadoopShim.java
License:Apache License
@Override public int getConfNumMaps(Configuration conf) { return conf.getInt(JobContext.NUM_MAPS, 1); }