List of usage examples for org.apache.hadoop.conf Configuration getInt
public int getInt(String name, int defaultValue)
name
property as an int
. From source file:com.kasabi.labs.freebase.mr.Utils.java
License:Apache License
public static void setReducers(Job job, Configuration configuration, Logger log) { boolean runLocal = configuration.getBoolean(Constants.OPTION_RUN_LOCAL, Constants.OPTION_RUN_LOCAL_DEFAULT); int num_reducers = configuration.getInt(Constants.OPTION_NUM_REDUCERS, Constants.OPTION_NUM_REDUCERS_DEFAULT); if (runLocal) { if (log != null) log.debug("Setting number of reducers to {}", 1); job.setNumReduceTasks(1);//from w w w . j a v a2s .c o m } else { job.setNumReduceTasks(num_reducers); if (log != null) log.debug("Setting number of reducers to {}", num_reducers); } }
From source file:com.koda.integ.hbase.blockcache.OffHeapBlockCache.java
License:Open Source License
/** * Instantiates a new off heap block cache. * * @param conf the conf//from ww w . j av a 2s . c om */ public OffHeapBlockCache(Configuration conf) { this.blockSize = conf.getInt("hbase.offheapcache.minblocksize", HColumnDescriptor.DEFAULT_BLOCKSIZE); blockCacheMaxSize = conf.getLong(BLOCK_CACHE_MEMORY_SIZE, 0L); if (blockCacheMaxSize == 0L) { throw new RuntimeException("off heap block cache size is not defined"); } nativeBufferSize = conf.getInt(BLOCK_CACHE_BUFFER_SIZE, DEFAULT_BLOCK_CACH_BUFFER_SIZE); extCacheMaxSize = conf.getLong(BLOCK_CACHE_EXT_STORAGE_MEMORY_SIZE, (long) (0.1 * blockCacheMaxSize)); youngGenFactor = conf.getFloat(BLOCK_CACHE_YOUNG_GEN_FACTOR, DEFAULT_YOUNG_FACTOR); overflowExtEnabled = conf.getBoolean(BLOCK_CACHE_OVERFLOW_TO_EXT_STORAGE_ENABLED, false); isPersistent = conf.getBoolean(BLOCK_CACHE_PERSISTENT, false); if (isPersistent) { // Check if we have already set CacheableDeserializer // We need to set deserializer before starting cache // because we can have already cached blocks on cache start up // and first get before put will fail. if (CacheableSerializer.getDeserializer() == null) { CacheableSerializer.setHFileDeserializer(); } else { LOG.info("CacheableSerializer is already set."); } } isSnapshotsEnabled = conf.getBoolean(BLOCK_CACHE_SNAPSHOTS, false); snapshotsInterval = conf.getInt(BLOCK_CACHE_SNAPSHOT_INTERVAL, 600) * 1000; String[] dataRoots = getDataRoots(conf.get(BLOCK_CACHE_DATA_ROOTS)); if (isPersistent && dataRoots == null) { dataRoots = getHDFSRoots(conf); if (dataRoots == null) { LOG.warn("Data roots are not defined. Set persistent mode to false."); isPersistent = false; } } adjustMaxMemory(); /** Possible values: none, snappy, gzip, lz4, lz4hc */ // TODO: LZ4 is not supported on all platforms // TODO: default compression is LZ4? CodecType codec = CodecType.LZ4; String value = conf.get(BLOCK_CACHE_COMPRESSION); if (value != null) { codec = CodecType.valueOf(value.toUpperCase()); } try { CacheConfiguration cacheCfg = new CacheConfiguration(); cacheCfg.setCacheName("block-cache"); cacheCfg.setSerDeBufferSize(nativeBufferSize); cacheCfg.setMaxMemory(blockCacheMaxSize); cacheCfg.setCodecType(codec); String evictionPolicy = conf.get(BLOCK_CACHE_EVICTION, "LRU").toUpperCase(); cacheCfg.setEvictionPolicy(evictionPolicy); // Set this only for LRU2Q cacheCfg.setLRU2QInsertPoint(youngGenFactor); setBucketNumber(cacheCfg); CacheManager manager = CacheManager.getInstance(); if (overflowExtEnabled == true) { LOG.info("Overflow to external storage is enabled."); // External storage handle cache CacheConfiguration extStorageCfg = new CacheConfiguration(); extStorageCfg.setCacheName("extStorageCache"); extStorageCfg.setMaxMemory(extCacheMaxSize); extStorageCfg.setEvictionPolicy(EvictionPolicy.FIFO.toString()); extStorageCfg.setSerDeBufferSize(4096);// small extStorageCfg.setPreevictionListSize(40); extStorageCfg.setKeyClassName(byte[].class.getName()); extStorageCfg.setValueClassName(byte[].class.getName()); // calculate bucket number // 50 is estimate of a record size int buckets = (extCacheMaxSize / EXT_STORAGE_REF_SIZE) > Integer.MAX_VALUE ? Integer.MAX_VALUE - 1 : (int) (extCacheMaxSize / EXT_STORAGE_REF_SIZE); extStorageCfg.setBucketNumber(buckets); if (isPersistent) { // TODO - this in memory cache has same data dirs as a major cache. RawFSConfiguration storeConfig = new RawFSConfiguration(); storeConfig.setStoreName(extStorageCfg.getCacheName()); storeConfig.setDiskStoreImplementation(RawFSStore.class); storeConfig.setDbDataStoreRoots(dataRoots); storeConfig.setPersistenceMode(PersistenceMode.ONDEMAND); storeConfig.setDbCompressionType(CodecType.LZ4); storeConfig.setDbSnapshotInterval(15); //storeConfig.setTotalWorkerThreads(Runtime.getRuntime().availableProcessors() /2); //storeConfig.setTotalIOThreads(1); extStorageCfg.setDataStoreConfiguration(storeConfig); } // This will initiate the load of stored cache data // if persistence is enabled extStorageCache = manager.getCache(extStorageCfg, null); // Initialize external storage storage = ExtStorageManager.getInstance().getStorage(conf, extStorageCache); } else { LOG.info("Overflow to external storage is disabled."); if (isPersistent) { RawFSConfiguration storeConfig = new RawFSConfiguration(); storeConfig.setStoreName(cacheCfg.getCacheName()); storeConfig.setDiskStoreImplementation(RawFSStore.class); storeConfig.setDbDataStoreRoots(dataRoots); storeConfig.setPersistenceMode(PersistenceMode.ONDEMAND); storeConfig.setDbSnapshotInterval(15); cacheCfg.setDataStoreConfiguration(storeConfig); // Load cache data offHeapCache = manager.getCache(cacheCfg, null); } } if (offHeapCache == null) { offHeapCache = manager.getCache(cacheCfg, null); } } catch (Exception e) { throw new RuntimeException(e); } boolean onHeapEnabled = conf.getBoolean(BLOCK_CACHE_ONHEAP_ENABLED, true); if (onHeapEnabled) { long onHeapCacheSize = calculateOnHeapCacheSize(conf); if (onHeapCacheSize > 0) { onHeapCache = new OnHeapBlockCache(onHeapCacheSize, blockSize, conf); LOG.info("Created fast on-heap cache. Size=" + onHeapCacheSize); } else { LOG.warn("Conflicting configuration options. On-heap cache is disabled."); } } this.stats = new CacheStats(); this.onHeapStats = new CacheStats(); this.offHeapStats = new CacheStats(); this.extStats = new CacheStats(); this.extRefStats = new CacheStats(); EvictionListener listener = new EvictionListener() { @Override public void evicted(long ptr, Reason reason, long nanoTime) { stats.evict(); stats.evicted(); } }; offHeapCache.setEvictionListener(listener); // Cacheable serializer registration CacheableSerializer serde = new CacheableSerializer(); offHeapCache.getSerDe().registerSerializer(serde); // if( extStorageCache != null){ // //StorageHandleSerializer serde2 = new StorageHandleSerializer(); // // SmallByteArraySerializer serde2 = new SmallByteArraySerializer(); // // extStorageCache.getSerDe().registerSerializer(serde2); // } // Start statistics thread statThread = new StatisticsThread(this); statThread.start(); }
From source file:com.koda.integ.hbase.blockcache.OffHeapBlockCacheOld.java
License:Open Source License
/** * Instantiates a new off heap block cache. * * @param conf the conf/* ww w . ja va 2 s . c o m*/ */ public OffHeapBlockCacheOld(Configuration conf) { this.blockSize = conf.getInt("hbase.offheapcache.minblocksize", HColumnDescriptor.DEFAULT_BLOCKSIZE); CacheConfiguration cacheCfg = ConfigHelper.getCacheConfiguration(conf); maxSize = cacheCfg.getMaxGlobalMemory(); if (maxSize == 0) { // USE max memory maxSize = cacheCfg.getMaxMemory(); LOG.warn("[OffHeapBlockCache] Gloabal max memory is not specified, using max memory instead."); } if (maxSize == 0) { LOG.fatal(CacheConfiguration.MAX_GLOBAL_MEMORY + " is not specified."); throw new RuntimeException( "[OffHeapBlockCache]" + CacheConfiguration.MAX_GLOBAL_MEMORY + " is not specified."); } //TODO make sure sum == 1 youngGenFactor = conf.getFloat(YOUNG_GEN_FACTOR, DEFAULT_YOUNG_FACTOR); tenGenFactor = conf.getFloat(TENURED_GEN_FACTOR, DEFAULT_PERM_FACTOR); permGenFactor = conf.getFloat(PERM_GEN_FACTOR, DEFAULT_PERM_FACTOR); extStorageFactor = conf.getFloat(EXT_STORAGE_FACTOR, DEFAULT_EXT_STORAGE_FACTOR); overflowExtEnabled = conf.getBoolean(OVERFLOW_TO_EXT_STORAGE_ENABLED, false); long youngSize = (long) (youngGenFactor * maxSize); long tenSize = (long) (tenGenFactor * maxSize); long permSize = (long) (permGenFactor * maxSize); long extStorageSize = (long) (extStorageFactor * maxSize); /** Possible values: none, snappy, gzip, lz4 */ // TODO: LZ4 is not supported on all platforms CodecType youngGenCodec = CodecType.LZ4; CodecType tenGenCodec = CodecType.LZ4; CodecType permGenCodec = CodecType.LZ4; CodecType extStorageCodec = CodecType.LZ4; String value = conf.get(YOUNG_GEN_COMPRESSION); if (value != null) { youngGenCodec = CodecType.valueOf(value.toUpperCase()); } value = conf.get(TENURED_GEN_COMPRESSION); if (value != null) { tenGenCodec = CodecType.valueOf(value.toUpperCase()); } value = conf.get(PERM_GEN_COMPRESSION); if (value != null) { permGenCodec = CodecType.valueOf(value.toUpperCase()); } value = conf.get(EXT_STORAGE_COMPRESSION); if (value != null) { extStorageCodec = CodecType.valueOf(value.toUpperCase()); } try { //TODO - Verify we have deep enough copy CacheConfiguration youngCfg = cacheCfg.copy(); youngCfg.setMaxMemory(youngSize); // Disable disk persistence for young gen //TODO - Do we really need disabling //youngCfg.setDataStoreConfiguration(null); // TODO - enable exceed over limit mode //youngCfg.setCompressionEnabled(youngGenCodec !=CodecType.NONE); youngCfg.setCodecType(youngGenCodec); String name = youngCfg.getCacheName(); youngCfg.setCacheName(name + "_young"); setBucketNumber(youngCfg); CacheConfiguration tenCfg = cacheCfg.copy(); tenCfg.setMaxMemory(tenSize); // TODO - enable exceed over limit mode //tenCfg.setCompressionEnabled(tenGenCodec != CodecType.NONE); tenCfg.setCodecType(tenGenCodec); name = tenCfg.getCacheName(); tenCfg.setCacheName(name + "_tenured"); setBucketNumber(tenCfg); CacheConfiguration permCfg = cacheCfg.copy(); permCfg.setMaxMemory(permSize); // TODO - enable exceed over limit mode //permCfg.setCompressionEnabled(permGenCodec != CodecType.NONE); permCfg.setCodecType(permGenCodec); name = permCfg.getCacheName(); permCfg.setCacheName(name + "_perm"); setBucketNumber(permCfg); CacheManager manager = CacheManager.getInstance(); //TODO add ProgressListener youngGenCache = manager.getCache(youngCfg, null); // TODO - do we need this? //youngGenCache.setEvictionAuto(false); tenGenCache = manager.getCache(tenCfg, null); // TODO - do we need this? //tenGenCache.setEvictionAuto(false); permGenCache = manager.getCache(permCfg, null); // TODO - do we need this? //permGenCache.setEvictionAuto(false); if (overflowExtEnabled == true) { LOG.info("Overflow to external storage is enabled."); // External storage handle cache CacheConfiguration extStorageCfg = cacheCfg.copy(); permCfg.setMaxMemory(extStorageSize); permCfg.setCodecType(extStorageCodec); name = permCfg.getCacheName(); permCfg.setCacheName(name + "_ext"); // calculate bucket number // 50 is estimate of a record size int buckets = (extStorageSize / 50) > Integer.MAX_VALUE ? Integer.MAX_VALUE - 1 : (int) (extStorageSize / 50); extStorageCfg.setBucketNumber(buckets); extStorageCache = manager.getCache(extStorageCfg, null); // Initialize external storage storage = ExtStorageManager.getInstance().getStorage(conf, extStorageCache); } else { LOG.info("Overflow to external storage is disabled."); } } catch (Exception e) { throw new RuntimeException(e); } this.stats = new CacheStats(); EvictionListener listener = new EvictionListener() { @Override public void evicted(long ptr, Reason reason, long nanoTime) { stats.evict(); stats.evicted(); } }; youngGenCache.setEvictionListener(listener); // TODO separate eviction listener tenGenCache.setEvictionListener(listener); permGenCache.setEvictionListener(listener); // Cacheable serializer registration CacheableSerializer serde = new CacheableSerializer(); youngGenCache.getSerDe().registerSerializer(serde); tenGenCache.getSerDe().registerSerializer(serde); permGenCache.getSerDe().registerSerializer(serde); if (extStorageCache != null) { StorageHandleSerializer serde2 = new StorageHandleSerializer(); extStorageCache.getSerDe().registerSerializer(serde2); } }
From source file:com.koda.integ.hbase.util.ConfigHelper.java
License:Open Source License
/** * Gets the cache configuration.//from w w w . j ava2 s .c o m * * @param cfg the cfg * @return the cache configuration */ public static CacheConfiguration getCacheConfiguration(Configuration cfg) { CacheConfiguration ccfg = new CacheConfiguration(); String value = cfg.get(CacheConfiguration.COMPRESSION, "none"); //TODO not safe ccfg.setCodecType(CodecType.valueOf(value.toUpperCase())); ccfg.setCompressionThreshold(cfg.getInt(CacheConfiguration.COMPRESSION_THRESHOLD, 100)); ccfg.setDefaultExpireTimeout(cfg.getInt(CacheConfiguration.DEFAULT_EXPIRE_TIMEOUT, 0)); ccfg.setEvictOnExpireFirst(cfg.getBoolean(CacheConfiguration.EVICT_EXPIRED_FIRST, true)); ccfg.setCandidateListSize((cfg.getInt(CacheConfiguration.EVICTION_LIST_SIZE, 30))); ccfg.setEvictionPolicy((cfg.get(CacheConfiguration.EVICTION_POLICY, "lru"))); ccfg.setHighWatermark(cfg.getFloat(CacheConfiguration.HIGH_WATERMARK, 0.98f)); ccfg.setLowWatermark(cfg.getFloat(CacheConfiguration.LOW_WATERMARK, 0.95f)); value = cfg.get(CacheConfiguration.KEY_CLASSNAME); if (value != null) { ccfg.setKeyClassName(value); } value = cfg.get(CacheConfiguration.VALUE_CLASSNAME); if (value != null) { ccfg.setValueClassName(value); } ccfg.setMaxConcurrentReaders(cfg.getInt(CacheConfiguration.MAX_CONCURRENT_READERS, 0)); ccfg.setMaxQueryProcessors(cfg.getInt(CacheConfiguration.MAX_QUERY_PROCESSORS, 0)); ccfg.setMaxEntries(cfg.getLong(CacheConfiguration.MAX_ENTRIES, 0)); value = cfg.get(CacheConfiguration.MAX_GLOBAL_MEMORY); if (value != null) { ccfg.setMaxGlobalMemory(Long.parseLong(value)); } else { LOG.info(" Max global memory is not specified."); } value = cfg.get(CacheConfiguration.MAX_MEMORY); if (value != null) { ccfg.setMaxMemory(Long.parseLong(value)); } else { LOG.info(" Max memory is not specified."); } ccfg.setCacheName(cfg.get(CacheConfiguration.NAME, "default")); ccfg.setCacheNamespace(cfg.get(CacheConfiguration.NAMESPACE, "default")); ccfg.setSerDeBufferSize(cfg.getInt(CacheConfiguration.SERDE_BUFSIZE, 4 * 1024 * 1024)); // TODO bucket number must be calculated ccfg.setBucketNumber(cfg.getInt(CacheConfiguration.TOTAL_BUCKETS, 1000000)); // Done with common cache configurations value = cfg.get(DiskStoreConfiguration.PERSISTENCE, "none"); if (value.equals("none")) { // We are done return ccfg; } DiskStoreConfiguration dcfg = loadDiskStoreCfg(cfg, value); ccfg.setDataStoreConfiguration(dcfg); return ccfg; }
From source file:com.linkedin.cubert.block.CreateBlockOperator.java
License:Open Source License
@Override public void setInput(Configuration conf, Map<String, Block> input, JsonNode json) throws IOException, InterruptedException { if (input.size() == 0) throw new IllegalArgumentException("No input block is provided"); if (input.size() != 1) throw new IllegalArgumentException( "This operator operates on only one input block. (" + input.size() + " provided)"); this.inputJson = json; Block block = input.values().iterator().next(); if (block == null) throw new IllegalArgumentException( "The specified block for [" + input.keySet().iterator().next() + "] is null"); blockgenType = BlockgenType.valueOf(JsonUtils.getText(json, "blockgenType").toUpperCase()); if (json.has("blockgenValue")) { blockgenValue = json.get("blockgenValue").getLongValue(); }/*from w ww . j a v a 2 s.co m*/ partitionKeys = JsonUtils.asArray(json.get("partitionKeys")); // if (json.get("originalPartitionKeys") != null) // originalPartitionKeys = JsonUtils.asArray(json.get("originalPartitionKeys")); if (json.has("pivotKeys")) sortKeys = JsonUtils.asArray(json, "pivotKeys"); // if sort keys are prefix of partitionkeys. or vice versa, there is no need to // sort the block if (CommonUtils.isPrefix(partitionKeys, sortKeys) || CommonUtils.isPrefix(sortKeys, partitionKeys)) sortKeys = null; inputBlock = new PivotedBlock(block, partitionKeys); if (blockgenType == BlockgenType.BY_INDEX) { if (PhaseContext.isMapper()) { throw new RuntimeException("Expecting Reduce Context while performing LOAD BLOCK"); } nReducers = conf.getInt("mapred.reduce.tasks", -1); if (nReducers < 0) throw new RuntimeException("Unable to determine number of reducers."); reducerId = PhaseContext.getRedContext().getTaskAttemptID().getTaskID().getId(); retrieveRelevantBlockIds(json); } }
From source file:com.linkedin.cubert.io.MultiMapperInputFormat.java
License:Open Source License
@Override public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); int numMultiMappers = conf.getInt(CubertStrings.NUM_MULTI_MAPPERS, 1); List<InputSplit> splits = new ArrayList<InputSplit>(); for (int i = 0; i < numMultiMappers; i++) { String dirs = conf.get(CubertStrings.MAPRED_INPUT_DIR + i); conf.set("mapred.input.dir", dirs); List<InputSplit> mapperSplits = getDelegate(context.getConfiguration(), i).getSplits(context); for (InputSplit split : mapperSplits) { splits.add(new MultiMapperSplit((FileSplit) split, i)); }/*from w w w . ja v a2 s . com*/ } return splits; }
From source file:com.linkedin.cubert.pig.piggybank.storage.avro.PigAvroOutputFormat.java
License:Apache License
@Override public RecordWriter<NullWritable, Object> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { if (schema == null) throw new IOException("Must provide a schema"); Configuration conf = context.getConfiguration(); DataFileWriter<Object> writer = new DataFileWriter<Object>(new PigAvroDatumWriter(schema)); if (FileOutputFormat.getCompressOutput(context)) { int level = conf.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL); String codecName = conf.get(OUTPUT_CODEC, DEFLATE_CODEC); CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName); writer.setCodec(factory);/* www .j a v a 2s .co m*/ } // Do max as core-default.xml has io.file.buffer.size as 4K writer.setSyncInterval(conf.getInt(SYNC_INTERVAL_KEY, Math.max(conf.getInt("io.file.buffer.size", DEFAULT_SYNC_INTERVAL), DEFAULT_SYNC_INTERVAL))); Path path = getDefaultWorkFile(context, EXT); writer.create(schema, path.getFileSystem(conf).create(path)); return new PigAvroRecordWriter(writer); }
From source file:com.linkedin.drelephant.util.Utils.java
License:Apache License
/** * Get non negative int value from Configuration. * * If the value is not set or not an integer, the provided default value is returned. * If the value is negative, 0 is returned. * * @param conf Configuration to be extracted * @param key property name/* ww w .ja va 2 s . co m*/ * @param defaultValue default value * @return non negative int value */ public static int getNonNegativeInt(Configuration conf, String key, int defaultValue) { try { int value = conf.getInt(key, defaultValue); if (value < 0) { value = 0; logger.warn("Configuration " + key + " is negative. Resetting it to 0"); } return value; } catch (NumberFormatException e) { logger.error("Invalid configuration " + key + ". Value is " + conf.get(key) + ". Resetting it to default value: " + defaultValue); return defaultValue; } }
From source file:com.linkedin.pinot.common.segment.fetcher.HdfsSegmentFetcher.java
License:Apache License
@Override public void init(org.apache.commons.configuration.Configuration configs) { try {//from w w w. jav a 2s.c o m retryCount = configs.getInt(RETRY, retryCount); retryWaitMs = configs.getInt(RETRY_WAITIME_MS, retryWaitMs); Configuration hadoopConf = getConf(configs.getString(HADOOP_CONF_PATH)); authenticate(hadoopConf, configs); hadoopFS = FileSystem.get(hadoopConf); LOGGER.info("successfully initialized hdfs segment fetcher"); } catch (Exception e) { LOGGER.error("failed to initialized the hdfs segment fetcher", e); } }
From source file:com.marklogic.contentpump.LocalJobRunner.java
License:Apache License
public LocalJobRunner(Job job, CommandLine cmdline, Command cmd) { this.job = job; this.cmd = cmd; threadCount = DEFAULT_THREAD_COUNT;/*from w ww. ja v a 2 s .c o m*/ if (cmdline.hasOption(THREAD_COUNT)) { threadCount = Integer.parseInt(cmdline.getOptionValue(THREAD_COUNT)); } if (threadCount > 1) { pool = Executors.newFixedThreadPool(threadCount); if (LOG.isDebugEnabled()) { LOG.debug("Thread pool size: " + threadCount); } } if (cmdline.hasOption(THREADS_PER_SPLIT)) { threadsPerSplit = Integer.parseInt(cmdline.getOptionValue(THREADS_PER_SPLIT)); } Configuration conf = job.getConfiguration(); minThreads = conf.getInt(CONF_MIN_THREADS, minThreads); jobComplete = new AtomicBoolean(); startTime = System.currentTimeMillis(); }