List of usage examples for org.apache.hadoop.conf Configuration getLong
public long getLong(String name, long defaultValue)
name
property as a long
. From source file:com.firewallid.util.HTMLContent.java
public HTMLContent(Configuration firewallConf) { partitions = firewallConf.getInt(PARTITIONS, 48); retries = firewallConf.getInt(RETRIES, 10); timeout = firewallConf.getLong(TIMEOUT, 5); }
From source file:com.fullcontact.sstable.index.SSTableIndexIndex.java
License:Apache License
/** * Create and write an index index based on the input Cassandra Index.db file. Read the Index.db and generate chunks * (splits) based on the configured chunk size. * * @param fileSystem Hadoop file system. * @param sstablePath SSTable Index.db./*w ww .jav a 2 s . c om*/ * @throws IOException */ public static void writeIndex(final FileSystem fileSystem, final Path sstablePath) throws IOException { final Configuration configuration = fileSystem.getConf(); final long splitSize = configuration.getLong(HadoopSSTableConstants.HADOOP_SSTABLE_SPLIT_MB, HadoopSSTableConstants.DEFAULT_SPLIT_MB) * 1024 * 1024; final Closer closer = Closer.create(); final Path outputPath = sstablePath.suffix(SSTABLE_INDEX_SUFFIX); final Path inProgressOutputPath = sstablePath.suffix(SSTABLE_INDEX_IN_PROGRESS_SUFFIX); boolean success = false; try { final FSDataOutputStream os = closer.register(fileSystem.create(inProgressOutputPath)); final TLongArrayList splitOffsets = new TLongArrayList(); long currentStart = 0; long currentEnd = 0; final IndexOffsetScanner index = new IndexOffsetScanner(sstablePath, fileSystem); while (index.hasNext()) { // NOTE: This does not give an exact size of this split in bytes but a rough estimate. // This should be good enough since it's only used for sorting splits by size in hadoop land. while (currentEnd - currentStart < splitSize && index.hasNext()) { currentEnd = index.next(); splitOffsets.add(currentEnd); } // Record the split final long[] offsets = splitOffsets.toArray(); os.writeLong(offsets[0]); // Start os.writeLong(offsets[offsets.length - 1]); // End // Clear the offsets splitOffsets.clear(); if (index.hasNext()) { currentStart = index.next(); currentEnd = currentStart; splitOffsets.add(currentStart); } } success = true; } finally { closer.close(); if (!success) { fileSystem.delete(inProgressOutputPath, false); } else { fileSystem.rename(inProgressOutputPath, outputPath); } } }
From source file:com.google.appengine.tools.mapreduce.RangeInputFormat.java
License:Apache License
private long getNonNegativeLong(Configuration conf, String key) throws IOException { long retVal = conf.getLong(key, -1L); if (retVal < 0) { throw new InvalidConfigurationException("Invalid or nonexistent value for " + key); }//from w ww . j ava2 s . c o m return retVal; }
From source file:com.google.cloud.bigtable.hbase.BigtableOptionsFactory.java
License:Open Source License
public static BigtableOptions fromConfiguration(final Configuration configuration) throws IOException { BigtableOptions.Builder bigtableOptionsBuilder = new BigtableOptions.Builder(); bigtableOptionsBuilder.setProjectId(getValue(configuration, PROJECT_ID_KEY, "Project ID")); bigtableOptionsBuilder.setZoneId(getValue(configuration, ZONE_KEY, "Zone")); bigtableOptionsBuilder.setClusterId(getValue(configuration, CLUSTER_KEY, "Cluster")); bigtableOptionsBuilder/*from w w w. j av a2s . c o m*/ .setDataHost(getHost(configuration, BIGTABLE_HOST_KEY, BIGTABLE_DATA_HOST_DEFAULT, "API Data")); bigtableOptionsBuilder.setTableAdminHost(getHost(configuration, BIGTABLE_TABLE_ADMIN_HOST_KEY, BIGTABLE_TABLE_ADMIN_HOST_DEFAULT, "Table Admin")); bigtableOptionsBuilder.setClusterAdminHost(getHost(configuration, BIGTABLE_CLUSTER_ADMIN_HOST_KEY, BIGTABLE_CLUSTER_ADMIN_HOST_DEFAULT, "Cluster Admin")); int port = configuration.getInt(BIGTABLE_PORT_KEY, BIGTABLE_PORT_DEFAULT); bigtableOptionsBuilder.setPort(port); setChannelOptions(bigtableOptionsBuilder, configuration); int asyncMutatorCount = configuration.getInt(BIGTABLE_ASYNC_MUTATOR_COUNT_KEY, BIGTABLE_ASYNC_MUTATOR_COUNT_DEFAULT); bigtableOptionsBuilder.setAsyncMutatorWorkerCount(asyncMutatorCount); bigtableOptionsBuilder.setUseBulkApi(configuration.getBoolean(BIGTABLE_USE_BULK_API, true)); bigtableOptionsBuilder.setBulkMaxRowKeyCount(configuration.getInt(BIGTABLE_BULK_MAX_ROW_KEY_COUNT, BigtableOptions.BIGTABLE_BULK_MAX_ROW_KEY_COUNT_DEFAULT)); bigtableOptionsBuilder.setBulkMaxRequestSize(configuration.getLong(BIGTABLE_BULK_MAX_REQUEST_SIZE_BYTES, BigtableOptions.BIGTABLE_BULK_MAX_REQUEST_SIZE_BYTES_DEFAULT)); bigtableOptionsBuilder .setUsePlaintextNegotiation(configuration.getBoolean(BIGTABLE_USE_PLAINTEXT_NEGOTIATION, false)); return bigtableOptionsBuilder.build(); }
From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemBase.java
License:Open Source License
/** * Configures GHFS using the supplied configuration. * * @param config Hadoop configuration object. *//*from w ww. j av a 2s .c om*/ private synchronized void configure(Configuration config) throws IOException { LOG.debug("GHFS.configure"); LOG.debug("GHFS_ID = {}", GHFS_ID); if (gcsfs == null) { copyDeprecatedConfigurationOptions(config); Credential credential; try { credential = HadoopCredentialConfiguration.newBuilder().withConfiguration(config) .withOverridePrefix(AUTHENTICATION_PREFIX).build() .getCredential(CredentialFactory.GCS_SCOPES); } catch (GeneralSecurityException gse) { throw new IOException(gse); } GoogleCloudStorageFileSystemOptions.Builder optionsBuilder = createOptionsBuilderFromConfig(config); PathCodec pathCodec; String specifiedPathCodec = config.get(PATH_CODEC_KEY, PATH_CODEC_DEFAULT).toLowerCase(); LOG.debug("{} = {}", PATH_CODEC_KEY, specifiedPathCodec); if (specifiedPathCodec.equals(PATH_CODEC_USE_LEGACY_ENCODING)) { pathCodec = GoogleCloudStorageFileSystem.LEGACY_PATH_CODEC; } else if (specifiedPathCodec.equals(PATH_CODEC_USE_URI_ENCODING)) { pathCodec = GoogleCloudStorageFileSystem.URI_ENCODED_PATH_CODEC; } else { pathCodec = GoogleCloudStorageFileSystem.LEGACY_PATH_CODEC; LOG.warn("Unknwon path codec specified {}. Using default / legacy.", specifiedPathCodec); } optionsBuilder.setPathCodec(pathCodec); gcsfs = new GoogleCloudStorageFileSystem(credential, optionsBuilder.build()); } bufferSizeOverride = config.getInt(BUFFERSIZE_KEY, BUFFERSIZE_DEFAULT); LOG.debug("{} = {}", BUFFERSIZE_KEY, bufferSizeOverride); defaultBlockSize = config.getLong(BLOCK_SIZE_KEY, BLOCK_SIZE_DEFAULT); LOG.debug("{} = {}", BLOCK_SIZE_KEY, defaultBlockSize); String systemBucketName = config.get(GCS_SYSTEM_BUCKET_KEY, null); LOG.debug("{} = {}", GCS_SYSTEM_BUCKET_KEY, systemBucketName); boolean createSystemBucket = config.getBoolean(GCS_CREATE_SYSTEM_BUCKET_KEY, GCS_CREATE_SYSTEM_BUCKET_DEFAULT); LOG.debug("{} = {}", GCS_CREATE_SYSTEM_BUCKET_KEY, createSystemBucket); reportedPermissions = new FsPermission( config.get(PERMISSIONS_TO_REPORT_KEY, PERMISSIONS_TO_REPORT_DEFAULT)); LOG.debug("{} = {}", PERMISSIONS_TO_REPORT_KEY, reportedPermissions); configureBuckets(systemBucketName, createSystemBucket); // Set initial working directory to root so that any configured value gets resolved // against file system root. workingDirectory = getFileSystemRoot(); Path newWorkingDirectory; String configWorkingDirectory = config.get(GCS_WORKING_DIRECTORY_KEY); if (Strings.isNullOrEmpty(configWorkingDirectory)) { newWorkingDirectory = getDefaultWorkingDirectory(); LOG.warn("No working directory configured, using default: '{}'", newWorkingDirectory); } else { newWorkingDirectory = new Path(configWorkingDirectory); } // Use the public method to ensure proper behavior of normalizing and resolving the new // working directory relative to the initial filesystem-root directory. setWorkingDirectory(newWorkingDirectory); LOG.debug("{} = {}", GCS_WORKING_DIRECTORY_KEY, getWorkingDirectory()); // Set this configuration as the default config for this instance. setConf(config); LOG.debug("GHFS.configure: done"); }
From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemBase.java
License:Open Source License
@VisibleForTesting GoogleCloudStorageFileSystemOptions.Builder createOptionsBuilderFromConfig(Configuration config) throws IOException { GoogleCloudStorageFileSystemOptions.Builder optionsBuilder = GoogleCloudStorageFileSystemOptions .newBuilder();/* www. j a v a2 s .com*/ boolean enableMetadataCache = config.getBoolean(GCS_ENABLE_METADATA_CACHE_KEY, GCS_ENABLE_METADATA_CACHE_DEFAULT); LOG.debug("{} = {}", GCS_ENABLE_METADATA_CACHE_KEY, enableMetadataCache); optionsBuilder.setIsMetadataCacheEnabled(enableMetadataCache); boolean enableBucketDelete = config.getBoolean(GCE_BUCKET_DELETE_ENABLE_KEY, GCE_BUCKET_DELETE_ENABLE_DEFAULT); LOG.debug("{} = {}", GCE_BUCKET_DELETE_ENABLE_KEY, enableBucketDelete); optionsBuilder.setEnableBucketDelete(enableBucketDelete); DirectoryListCache.Type cacheType = DirectoryListCache.Type .valueOf(config.get(GCS_METADATA_CACHE_TYPE_KEY, GCS_METADATA_CACHE_TYPE_DEFAULT)); LOG.debug("{} = {}", GCS_METADATA_CACHE_TYPE_KEY, cacheType); optionsBuilder.setCacheType(cacheType); String cacheBasePath = config.get(GCS_METADATA_CACHE_DIRECTORY_KEY, GCS_METADATA_CACHE_DIRECTORY_DEFAULT); LOG.debug("{} = {}", GCS_METADATA_CACHE_DIRECTORY_KEY, cacheBasePath); optionsBuilder.setCacheBasePath(cacheBasePath); long cacheMaxEntryAgeMillis = config.getLong(GCS_METADATA_CACHE_MAX_ENTRY_AGE_KEY, GCS_METADATA_CACHE_MAX_ENTRY_AGE_DEFAULT); LOG.debug("{} = {}", GCS_METADATA_CACHE_MAX_ENTRY_AGE_KEY, cacheMaxEntryAgeMillis); optionsBuilder.setCacheMaxEntryAgeMillis(cacheMaxEntryAgeMillis); long cacheMaxInfoAgeMillis = config.getLong(GCS_METADATA_CACHE_MAX_INFO_AGE_KEY, GCS_METADATA_CACHE_MAX_INFO_AGE_DEFAULT); LOG.debug("{} = {}", GCS_METADATA_CACHE_MAX_INFO_AGE_KEY, cacheMaxInfoAgeMillis); optionsBuilder.setCacheMaxInfoAgeMillis(cacheMaxInfoAgeMillis); GoogleCloudStorageFileSystemOptions.TimestampUpdatePredicate updatePredicate = ParentTimestampUpdateIncludePredicate .create(config); optionsBuilder.setShouldIncludeInTimestampUpdatesPredicate(updatePredicate); enableAutoRepairImplicitDirectories = config.getBoolean(GCS_ENABLE_REPAIR_IMPLICIT_DIRECTORIES_KEY, GCS_ENABLE_REPAIR_IMPLICIT_DIRECTORIES_DEFAULT); LOG.debug("{} = {}", GCS_ENABLE_REPAIR_IMPLICIT_DIRECTORIES_KEY, enableAutoRepairImplicitDirectories); enableInferImplicitDirectories = config.getBoolean(GCS_ENABLE_INFER_IMPLICIT_DIRECTORIES_KEY, GCS_ENABLE_INFER_IMPLICIT_DIRECTORIES_DEFAULT); LOG.debug("{} = {}", GCS_ENABLE_INFER_IMPLICIT_DIRECTORIES_KEY, enableInferImplicitDirectories); enableFlatGlob = config.getBoolean(GCS_ENABLE_FLAT_GLOB_KEY, GCS_ENABLE_FLAT_GLOB_DEFAULT); LOG.debug("{} = {}", GCS_ENABLE_FLAT_GLOB_KEY, enableFlatGlob); optionsBuilder.getCloudStorageOptionsBuilder() .setAutoRepairImplicitDirectoriesEnabled(enableAutoRepairImplicitDirectories) .setInferImplicitDirectoriesEnabled(enableInferImplicitDirectories); boolean enableMarkerFileCreation = config.getBoolean(GCS_ENABLE_MARKER_FILE_CREATION_KEY, GCS_ENABLE_MARKER_FILE_CREATION_DEFAULT); LOG.debug("{} = {}", GCS_ENABLE_MARKER_FILE_CREATION_KEY, enableMarkerFileCreation); optionsBuilder.getCloudStorageOptionsBuilder().setCreateMarkerObjects(enableMarkerFileCreation); String transportTypeString = config.get(GCS_HTTP_TRANSPORT_KEY, GCS_HTTP_TRANSPORT_DEFAULT); String proxyAddress = config.get(GCS_PROXY_ADDRESS_KEY, GCS_PROXY_ADDRESS_DEFAULT); HttpTransportFactory.HttpTransportType transportType = HttpTransportFactory .getTransportTypeOf(transportTypeString); optionsBuilder.getCloudStorageOptionsBuilder().setTransportType(transportType) .setProxyAddress(proxyAddress); String projectId = ConfigurationUtil.getMandatoryConfig(config, GCS_PROJECT_ID_KEY); optionsBuilder.getCloudStorageOptionsBuilder().setProjectId(projectId); long maxListItemsPerCall = config.getLong(GCS_MAX_LIST_ITEMS_PER_CALL, GCS_MAX_LIST_ITEMS_PER_CALL_DEFAULT); optionsBuilder.getCloudStorageOptionsBuilder().setMaxListItemsPerCall(maxListItemsPerCall); // Configuration for setting 250GB upper limit on file size to gain higher write throughput. boolean limitFileSizeTo250Gb = config.getBoolean(GCS_FILE_SIZE_LIMIT_250GB, GCS_FILE_SIZE_LIMIT_250GB_DEFAULT); optionsBuilder.getCloudStorageOptionsBuilder().getWriteChannelOptionsBuilder() .setFileSizeLimitedTo250Gb(limitFileSizeTo250Gb); // Configuration for setting GoogleCloudStorageWriteChannel upload buffer size. int uploadBufferSize = config.getInt(WRITE_BUFFERSIZE_KEY, WRITE_BUFFERSIZE_DEFAULT); LOG.debug("{} = {}", WRITE_BUFFERSIZE_KEY, uploadBufferSize); optionsBuilder.getCloudStorageOptionsBuilder().getWriteChannelOptionsBuilder() .setUploadBufferSize(uploadBufferSize); String applicationNameSuffix = config.get(GCS_APPLICATION_NAME_SUFFIX_KEY, GCS_APPLICATION_NAME_SUFFIX_DEFAULT); LOG.debug("{} = {}", GCS_APPLICATION_NAME_SUFFIX_KEY, applicationNameSuffix); String applicationName = GHFS_ID; if (!Strings.isNullOrEmpty(applicationNameSuffix)) { applicationName = applicationName + applicationNameSuffix; } LOG.debug("Setting GCS application name to {}", applicationName); optionsBuilder.getCloudStorageOptionsBuilder().setAppName(applicationName); boolean enablePerformanceCache = config.getBoolean(GCS_ENABLE_PERFORMANCE_CACHE_KEY, GCS_ENABLE_PERFORMANCE_CACHE_DEFAULT); LOG.debug("{} = {}", GCS_ENABLE_PERFORMANCE_CACHE_KEY, enablePerformanceCache); optionsBuilder.setIsPerformanceCacheEnabled(enablePerformanceCache); long performanceCacheMaxEntryAgeMillis = config.getLong(GCS_PERFORMANCE_CACHE_MAX_ENTRY_AGE_MILLIS_KEY, GCS_PERFORMANCE_CACHE_MAX_ENTRY_AGE_MILLIS_DEFAULT); LOG.debug("{} = {}", GCS_PERFORMANCE_CACHE_MAX_ENTRY_AGE_MILLIS_KEY, performanceCacheMaxEntryAgeMillis); boolean listCachingEnabled = config.getBoolean(GCS_PERFORMANCE_CACHE_LIST_CACHING_ENABLE_KEY, GCS_PERFORMANCE_CACHE_LIST_CACHING_ENABLE_DEFAULT); LOG.debug("{} = {}", GCS_PERFORMANCE_CACHE_LIST_CACHING_ENABLE_KEY, listCachingEnabled); optionsBuilder.getPerformanceCachingOptionsBuilder().setMaxEntryAgeMillis(performanceCacheMaxEntryAgeMillis) .setInferImplicitDirectoriesEnabled(enableInferImplicitDirectories) .setListCachingEnabled(listCachingEnabled); return optionsBuilder; }
From source file:com.hortonworks.hbase.replication.bridge.HBaseServer.java
License:Apache License
protected HBaseServer(String bindAddress, int port, Class<? extends Writable> paramClass, int handlerCount, int priorityHandlerCount, Configuration conf, String serverName, int highPriorityLevel) throws IOException { this.bindAddress = bindAddress; this.conf = conf; this.port = port; this.paramClass = paramClass; this.handlerCount = handlerCount; this.priorityHandlerCount = priorityHandlerCount; this.socketSendBufferSize = 0; // temporary backward compatibility String oldMaxQueueSize = this.conf.get("ipc.server.max.queue.size"); if (oldMaxQueueSize == null) { this.maxQueueLength = this.conf.getInt("ipc.server.max.callqueue.length", handlerCount * DEFAULT_MAX_CALLQUEUE_LENGTH_PER_HANDLER); } else {// w w w .j a v a2s . c om LOG.warn("ipc.server.max.queue.size was renamed " + "ipc.server.max.callqueue.length, " + "please update your configuration"); this.maxQueueLength = Integer.getInteger(oldMaxQueueSize); } this.maxQueueSize = this.conf.getInt("ipc.server.max.callqueue.size", DEFAULT_MAX_CALLQUEUE_SIZE); this.readThreads = conf.getInt("ipc.server.read.threadpool.size", 10); this.callQueue = new LinkedBlockingQueue<Call>(maxQueueLength); if (priorityHandlerCount > 0) { this.priorityCallQueue = new LinkedBlockingQueue<Call>(maxQueueLength); // TODO hack on size } else { this.priorityCallQueue = null; } this.highPriorityLevel = highPriorityLevel; this.maxIdleTime = 2 * conf.getInt("ipc.client.connection.maxidletime", 1000); this.maxConnectionsToNuke = conf.getInt("ipc.client.kill.max", 10); this.thresholdIdleConnections = conf.getInt("ipc.client.idlethreshold", 4000); this.purgeTimeout = conf.getLong("ipc.client.call.purge.timeout", 2 * HConstants.DEFAULT_HBASE_RPC_TIMEOUT); this.numOfReplicationHandlers = conf.getInt("hbase.regionserver.replication.handler.count", 3); if (numOfReplicationHandlers > 0) { this.replicationQueue = new LinkedBlockingQueue<Call>(maxQueueSize); } // Start the listener here and let it bind to the port listener = new Listener(); this.port = listener.getAddress().getPort(); this.rpcMetrics = new HBaseRpcMetrics(serverName, Integer.toString(this.port)); this.tcpNoDelay = conf.getBoolean("ipc.server.tcpnodelay", false); this.tcpKeepAlive = conf.getBoolean("ipc.server.tcpkeepalive", true); this.warnDelayedCalls = conf.getInt(WARN_DELAYED_CALLS, DEFAULT_WARN_DELAYED_CALLS); this.delayedCalls = new AtomicInteger(0); this.responseQueuesSizeThrottler = new SizeBasedThrottler( conf.getLong(RESPONSE_QUEUES_MAX_SIZE, DEFAULT_RESPONSE_QUEUES_MAX_SIZE)); // Create the responder here responder = new Responder(); }
From source file:com.hp.hpit.cs.MyCombineFileInputFormat.java
License:Apache License
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { long minSizeNode = 0; long minSizeRack = 0; long maxSize = 0; Configuration conf = job.getConfiguration(); // the values specified by setxxxSplitSize() takes precedence over the // values that might have been specified in the config if (minSplitSizeNode != 0) { minSizeNode = minSplitSizeNode;/*www. ja v a2 s . c o m*/ } else { minSizeNode = conf.getLong(SPLIT_MINSIZE_PERNODE, 0); } if (minSplitSizeRack != 0) { minSizeRack = minSplitSizeRack; } else { minSizeRack = conf.getLong(SPLIT_MINSIZE_PERRACK, 0); } if (maxSplitSize != 0) { maxSize = maxSplitSize; } else { maxSize = conf.getLong("mapreduce.input.fileinputformat.split.maxsize", 0); } if (minSizeNode != 0 && maxSize != 0 && minSizeNode > maxSize) { throw new IOException("Minimum split size pernode " + minSizeNode + " cannot be larger than maximum split size " + maxSize); } if (minSizeRack != 0 && maxSize != 0 && minSizeRack > maxSize) { throw new IOException("Minimum split size per rack" + minSizeRack + " cannot be larger than maximum split size " + maxSize); } if (minSizeRack != 0 && minSizeNode > minSizeRack) { throw new IOException("Minimum split size per node" + minSizeNode + " cannot be smaller than minimum split " + "size per rack " + minSizeRack); } // all the files in input set Path[] paths = FileUtil.stat2Paths(listStatus(job).toArray(new FileStatus[0])); List<InputSplit> splits = new ArrayList<InputSplit>(); if (paths.length == 0) { return splits; } // Convert them to Paths first. This is a costly operation and // we should do it first, otherwise we will incur doing it multiple // times, one time each for each pool in the next loop. List<Path> newpaths = new LinkedList<Path>(); for (int i = 0; i < paths.length; i++) { Path p = new Path(paths[i].toUri()); newpaths.add(p); } paths = null; // In one single iteration, process all the paths in a single pool. // Processing one pool at a time ensures that a split contains paths // from a single pool only. for (MultiPathFilter onepool : pools) { ArrayList<Path> myPaths = new ArrayList<Path>(); // pick one input path. If it matches all the filters in a pool, // add it to the output set for (Iterator<Path> iter = newpaths.iterator(); iter.hasNext();) { Path p = iter.next(); if (onepool.accept(p)) { myPaths.add(p); // add it to my output set iter.remove(); } } // create splits for all files in this pool. getMoreSplits(job, myPaths.toArray(new Path[myPaths.size()]), maxSize, minSizeNode, minSizeRack, splits); } // create splits for all files that are not in any pool. getMoreSplits(job, newpaths.toArray(new Path[newpaths.size()]), maxSize, minSizeNode, minSizeRack, splits); // free up rackToNodes map rackToNodes.clear(); return splits; }
From source file:com.ikanow.aleph2.analytics.hadoop.assets.UpdatedCombineFileInputFormat.java
License:Apache License
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { long minSizeNode = 0; long minSizeRack = 0; long maxSize = 0; Configuration conf = job.getConfiguration(); // the values specified by setxxxSplitSize() takes precedence over the // values that might have been specified in the config if (minSplitSizeNode != 0) { minSizeNode = minSplitSizeNode;//from w ww . j a va 2 s. c o m } else { minSizeNode = conf.getLong(SPLIT_MINSIZE_PERNODE, 0); } if (minSplitSizeRack != 0) { minSizeRack = minSplitSizeRack; } else { minSizeRack = conf.getLong(SPLIT_MINSIZE_PERRACK, 0); } if (maxSplitSize != 0) { maxSize = maxSplitSize; } else { maxSize = conf.getLong("mapreduce.input.fileinputformat.split.maxsize", 0); // If maxSize is not configured, a single split will be generated per // node. } if (minSizeNode != 0 && maxSize != 0 && minSizeNode > maxSize) { throw new IOException("Minimum split size pernode " + minSizeNode + " cannot be larger than maximum split size " + maxSize); } if (minSizeRack != 0 && maxSize != 0 && minSizeRack > maxSize) { throw new IOException("Minimum split size per rack " + minSizeRack + " cannot be larger than maximum split size " + maxSize); } if (minSizeRack != 0 && minSizeNode > minSizeRack) { throw new IOException("Minimum split size per node " + minSizeNode + " cannot be larger than minimum split " + "size per rack " + minSizeRack); } // all the files in input set List<FileStatus> stats = listStatus(job); List<InputSplit> splits = new ArrayList<InputSplit>(); if (stats.size() == 0) { return splits; } // In one single iteration, process all the paths in a single pool. // Processing one pool at a time ensures that a split contains paths // from a single pool only. for (MultiPathFilter onepool : pools) { ArrayList<FileStatus> myPaths = new ArrayList<FileStatus>(); // pick one input path. If it matches all the filters in a pool, // add it to the output set for (Iterator<FileStatus> iter = stats.iterator(); iter.hasNext();) { FileStatus p = iter.next(); if (onepool.accept(p.getPath())) { myPaths.add(p); // add it to my output set iter.remove(); } } // create splits for all files in this pool. getMoreSplits(job, myPaths, maxSize, minSizeNode, minSizeRack, splits); } // create splits for all files that are not in any pool. getMoreSplits(job, stats, maxSize, minSizeNode, minSizeRack, splits); // free up rackToNodes map rackToNodes.clear(); return splits; }
From source file:com.inclouds.hbase.rowcache.RowCache.java
License:Open Source License
/** * Start co-processor - cache./*from w w w. j av a2 s . c o m*/ * * @param cfg * the cfg * @throws IOException * Signals that an I/O exception has occurred. */ public void start(Configuration cfg) throws IOException { // Get all config from Configuration object // Start - load cache this.config = cfg; synchronized (RowCache.class) { if (rowCache != null) return; final CacheConfiguration ccfg = ConfigHelper.getCacheConfiguration(cfg); // set cache name ccfg.setCacheName("row-cache"); long maxMemory = cfg.getLong(ROWCACHE_MAXMEMORY, DEFAULT_MAX_MEMORY); ccfg.setMaxMemory(maxMemory); LOG.info("[row-cache] Setting max memory to " + maxMemory); long maxItems = cfg.getLong(ROWCACHE_MAXITEMS, DEFAULT_MAXITEMS); if (maxItems > Integer.MAX_VALUE - 1) { maxItems = Integer.MAX_VALUE - 1; LOG.warn("[row-cache] Max items is too large " + maxItems); } else { LOG.info("[row-cache] Setting max items to " + maxItems); } LOG.info("[row-cache] Direct memory buffer size set to " + StringUtils.byteDesc(RowCache.ioBufferSize)); ccfg.setBucketNumber((int) maxItems); String codecName = cfg.get(ROWCACHE_COMPRESSION, DEFAULT_COMPRESSION); ccfg.setCodecType(CodecType.valueOf(codecName.toUpperCase())); LOG.info("[row-cache] compression codec=" + codecName); isPersistentCache = Boolean.parseBoolean(cfg.get(ROWCACHE_PERSISTENT, DEFAULT_PERSISTENT)); LOG.info("[row-cache] persistent=" + isPersistentCache); String[] dataRoots = getDataRoots(cfg.get(ROWCACHE_CACHE_DATA_ROOTS)); if (isPersistentCache && dataRoots == null) { dataRoots = getHDFSRoots(cfg); if (dataRoots == null) { LOG.warn("Data roots are not defined for Row Cache. Set persistent mode to false."); isPersistentCache = false; } } // TODO - compression CacheManager manager = CacheManager.getInstance(); try { if (isPersistentCache) { RawFSConfiguration storeConfig = new RawFSConfiguration(); storeConfig.setDiskStoreImplementation(RawFSStore.class); storeConfig.setStoreName(ccfg.getCacheName()); storeConfig.setDbDataStoreRoots(dataRoots); storeConfig.setPersistenceMode(PersistenceMode.ONDEMAND); storeConfig.setDbSnapshotInterval(15); ccfg.setDataStoreConfiguration(storeConfig); // Load cache data rowCache = manager.getCache(ccfg, null); } else { rowCache = manager.getCache(ccfg, new ProgressListener() { @Override public void canceled() { LOG.info("Canceled"); } @Override public void error(Throwable t, boolean aborted) { LOG.error("Aborted=" + aborted, t); } @Override public void finished() { LOG.info("Finished loading cache"); } @Override public void progress(long done, long total) { LOG.info("Loaded " + done + " out of " + total); } @Override public void started() { LOG.info("Started loading scan cache data from " + ccfg.getDiskStoreConfiguration().getDbDataStoreRoots()); } }); } } catch (Throwable ex) { throw new IOException(ex); } LOG.info("[row-cache] coprocessor started "); RowCache.instance = this; Runnable r = new Runnable() { public void run() { LOG.info("[row-cache] Stats thread started. "); while (true) { try { Thread.sleep(STATS_INTERVAL); } catch (InterruptedException e) { } long lastR = lastRequests; long lastH = lastHits; long requests = rowCache.getTotalRequestCount(); long hits = rowCache.getHitCount(); if (requests != lastRequests) { // Log only if new data LOG.info("[L1-OFFHEAP]: accesses=" + requests + " hits=" + hits + " hitRatio=" + ((requests == 0) ? "0.00" : StringUtils.formatPercent((double) hits / requests, 2) + "%" + " Last period: accesses=" + (requests - lastR) + " hits=" + (hits - lastH) + " hitRatio=" + (((requests - lastR) == 0) ? "0.00" : StringUtils.formatPercent( (double) (hits - lastH) / (requests - lastR), 2))) + "%" + " maxMemory=" + StringUtils.byteDesc(rowCache.getMemoryLimit()) + " allocatedMemory=" + StringUtils.byteDesc(rowCache.getAllocatedMemorySize()) + " freeMemory=" + StringUtils .byteDesc(rowCache.getMemoryLimit() - rowCache.getAllocatedMemorySize()) + " totalItems=" + rowCache.size() + " evicted=" + rowCache.getEvictedCount()); lastRequests = requests; lastHits = hits; } } } }; statThread = new Thread(r, "BigBaseRowCache.StatisticsThread"); statThread.start(); // Register shutdown hook registerShutdownHook(); } }