Example usage for org.apache.hadoop.conf Configuration getLong

List of usage examples for org.apache.hadoop.conf Configuration getLong

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration getLong.

Prototype

public long getLong(String name, long defaultValue) 

Source Link

Document

Get the value of the name property as a long.

Usage

From source file:com.firewallid.util.HTMLContent.java

public HTMLContent(Configuration firewallConf) {
    partitions = firewallConf.getInt(PARTITIONS, 48);
    retries = firewallConf.getInt(RETRIES, 10);
    timeout = firewallConf.getLong(TIMEOUT, 5);
}

From source file:com.fullcontact.sstable.index.SSTableIndexIndex.java

License:Apache License

/**
 * Create and write an index index based on the input Cassandra Index.db file. Read the Index.db and generate chunks
 * (splits) based on the configured chunk size.
 *
 * @param fileSystem Hadoop file system.
 * @param sstablePath SSTable Index.db./*w ww  .jav  a  2  s .  c  om*/
 * @throws IOException
 */
public static void writeIndex(final FileSystem fileSystem, final Path sstablePath) throws IOException {

    final Configuration configuration = fileSystem.getConf();

    final long splitSize = configuration.getLong(HadoopSSTableConstants.HADOOP_SSTABLE_SPLIT_MB,
            HadoopSSTableConstants.DEFAULT_SPLIT_MB) * 1024 * 1024;

    final Closer closer = Closer.create();

    final Path outputPath = sstablePath.suffix(SSTABLE_INDEX_SUFFIX);
    final Path inProgressOutputPath = sstablePath.suffix(SSTABLE_INDEX_IN_PROGRESS_SUFFIX);

    boolean success = false;
    try {
        final FSDataOutputStream os = closer.register(fileSystem.create(inProgressOutputPath));

        final TLongArrayList splitOffsets = new TLongArrayList();
        long currentStart = 0;
        long currentEnd = 0;
        final IndexOffsetScanner index = new IndexOffsetScanner(sstablePath, fileSystem);

        while (index.hasNext()) {
            // NOTE: This does not give an exact size of this split in bytes but a rough estimate.
            // This should be good enough since it's only used for sorting splits by size in hadoop land.
            while (currentEnd - currentStart < splitSize && index.hasNext()) {
                currentEnd = index.next();
                splitOffsets.add(currentEnd);
            }

            // Record the split
            final long[] offsets = splitOffsets.toArray();
            os.writeLong(offsets[0]); // Start
            os.writeLong(offsets[offsets.length - 1]); // End

            // Clear the offsets
            splitOffsets.clear();

            if (index.hasNext()) {
                currentStart = index.next();
                currentEnd = currentStart;
                splitOffsets.add(currentStart);
            }
        }

        success = true;
    } finally {
        closer.close();

        if (!success) {
            fileSystem.delete(inProgressOutputPath, false);
        } else {
            fileSystem.rename(inProgressOutputPath, outputPath);
        }
    }
}

From source file:com.google.appengine.tools.mapreduce.RangeInputFormat.java

License:Apache License

private long getNonNegativeLong(Configuration conf, String key) throws IOException {
    long retVal = conf.getLong(key, -1L);
    if (retVal < 0) {
        throw new InvalidConfigurationException("Invalid or nonexistent value for " + key);
    }//from  w  ww  . j  ava2 s .  c  o  m
    return retVal;
}

From source file:com.google.cloud.bigtable.hbase.BigtableOptionsFactory.java

License:Open Source License

public static BigtableOptions fromConfiguration(final Configuration configuration) throws IOException {

    BigtableOptions.Builder bigtableOptionsBuilder = new BigtableOptions.Builder();

    bigtableOptionsBuilder.setProjectId(getValue(configuration, PROJECT_ID_KEY, "Project ID"));
    bigtableOptionsBuilder.setZoneId(getValue(configuration, ZONE_KEY, "Zone"));
    bigtableOptionsBuilder.setClusterId(getValue(configuration, CLUSTER_KEY, "Cluster"));

    bigtableOptionsBuilder/*from w  w  w. j  av a2s .  c  o  m*/
            .setDataHost(getHost(configuration, BIGTABLE_HOST_KEY, BIGTABLE_DATA_HOST_DEFAULT, "API Data"));

    bigtableOptionsBuilder.setTableAdminHost(getHost(configuration, BIGTABLE_TABLE_ADMIN_HOST_KEY,
            BIGTABLE_TABLE_ADMIN_HOST_DEFAULT, "Table Admin"));

    bigtableOptionsBuilder.setClusterAdminHost(getHost(configuration, BIGTABLE_CLUSTER_ADMIN_HOST_KEY,
            BIGTABLE_CLUSTER_ADMIN_HOST_DEFAULT, "Cluster Admin"));

    int port = configuration.getInt(BIGTABLE_PORT_KEY, BIGTABLE_PORT_DEFAULT);
    bigtableOptionsBuilder.setPort(port);
    setChannelOptions(bigtableOptionsBuilder, configuration);

    int asyncMutatorCount = configuration.getInt(BIGTABLE_ASYNC_MUTATOR_COUNT_KEY,
            BIGTABLE_ASYNC_MUTATOR_COUNT_DEFAULT);
    bigtableOptionsBuilder.setAsyncMutatorWorkerCount(asyncMutatorCount);

    bigtableOptionsBuilder.setUseBulkApi(configuration.getBoolean(BIGTABLE_USE_BULK_API, true));
    bigtableOptionsBuilder.setBulkMaxRowKeyCount(configuration.getInt(BIGTABLE_BULK_MAX_ROW_KEY_COUNT,
            BigtableOptions.BIGTABLE_BULK_MAX_ROW_KEY_COUNT_DEFAULT));
    bigtableOptionsBuilder.setBulkMaxRequestSize(configuration.getLong(BIGTABLE_BULK_MAX_REQUEST_SIZE_BYTES,
            BigtableOptions.BIGTABLE_BULK_MAX_REQUEST_SIZE_BYTES_DEFAULT));
    bigtableOptionsBuilder
            .setUsePlaintextNegotiation(configuration.getBoolean(BIGTABLE_USE_PLAINTEXT_NEGOTIATION, false));

    return bigtableOptionsBuilder.build();
}

From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemBase.java

License:Open Source License

/**
 * Configures GHFS using the supplied configuration.
 *
 * @param config Hadoop configuration object.
 *//*from   w  ww. j av a 2s .c om*/
private synchronized void configure(Configuration config) throws IOException {
    LOG.debug("GHFS.configure");
    LOG.debug("GHFS_ID = {}", GHFS_ID);

    if (gcsfs == null) {

        copyDeprecatedConfigurationOptions(config);

        Credential credential;
        try {
            credential = HadoopCredentialConfiguration.newBuilder().withConfiguration(config)
                    .withOverridePrefix(AUTHENTICATION_PREFIX).build()
                    .getCredential(CredentialFactory.GCS_SCOPES);
        } catch (GeneralSecurityException gse) {
            throw new IOException(gse);
        }

        GoogleCloudStorageFileSystemOptions.Builder optionsBuilder = createOptionsBuilderFromConfig(config);

        PathCodec pathCodec;
        String specifiedPathCodec = config.get(PATH_CODEC_KEY, PATH_CODEC_DEFAULT).toLowerCase();
        LOG.debug("{} = {}", PATH_CODEC_KEY, specifiedPathCodec);
        if (specifiedPathCodec.equals(PATH_CODEC_USE_LEGACY_ENCODING)) {
            pathCodec = GoogleCloudStorageFileSystem.LEGACY_PATH_CODEC;
        } else if (specifiedPathCodec.equals(PATH_CODEC_USE_URI_ENCODING)) {
            pathCodec = GoogleCloudStorageFileSystem.URI_ENCODED_PATH_CODEC;
        } else {
            pathCodec = GoogleCloudStorageFileSystem.LEGACY_PATH_CODEC;
            LOG.warn("Unknwon path codec specified {}. Using default / legacy.", specifiedPathCodec);
        }
        optionsBuilder.setPathCodec(pathCodec);
        gcsfs = new GoogleCloudStorageFileSystem(credential, optionsBuilder.build());
    }

    bufferSizeOverride = config.getInt(BUFFERSIZE_KEY, BUFFERSIZE_DEFAULT);
    LOG.debug("{} = {}", BUFFERSIZE_KEY, bufferSizeOverride);

    defaultBlockSize = config.getLong(BLOCK_SIZE_KEY, BLOCK_SIZE_DEFAULT);
    LOG.debug("{} = {}", BLOCK_SIZE_KEY, defaultBlockSize);

    String systemBucketName = config.get(GCS_SYSTEM_BUCKET_KEY, null);
    LOG.debug("{} = {}", GCS_SYSTEM_BUCKET_KEY, systemBucketName);

    boolean createSystemBucket = config.getBoolean(GCS_CREATE_SYSTEM_BUCKET_KEY,
            GCS_CREATE_SYSTEM_BUCKET_DEFAULT);
    LOG.debug("{} = {}", GCS_CREATE_SYSTEM_BUCKET_KEY, createSystemBucket);

    reportedPermissions = new FsPermission(
            config.get(PERMISSIONS_TO_REPORT_KEY, PERMISSIONS_TO_REPORT_DEFAULT));
    LOG.debug("{} = {}", PERMISSIONS_TO_REPORT_KEY, reportedPermissions);

    configureBuckets(systemBucketName, createSystemBucket);

    // Set initial working directory to root so that any configured value gets resolved
    // against file system root.
    workingDirectory = getFileSystemRoot();

    Path newWorkingDirectory;
    String configWorkingDirectory = config.get(GCS_WORKING_DIRECTORY_KEY);
    if (Strings.isNullOrEmpty(configWorkingDirectory)) {
        newWorkingDirectory = getDefaultWorkingDirectory();
        LOG.warn("No working directory configured, using default: '{}'", newWorkingDirectory);
    } else {
        newWorkingDirectory = new Path(configWorkingDirectory);
    }

    // Use the public method to ensure proper behavior of normalizing and resolving the new
    // working directory relative to the initial filesystem-root directory.
    setWorkingDirectory(newWorkingDirectory);
    LOG.debug("{} = {}", GCS_WORKING_DIRECTORY_KEY, getWorkingDirectory());

    // Set this configuration as the default config for this instance.
    setConf(config);

    LOG.debug("GHFS.configure: done");
}

From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemBase.java

License:Open Source License

@VisibleForTesting
GoogleCloudStorageFileSystemOptions.Builder createOptionsBuilderFromConfig(Configuration config)
        throws IOException {
    GoogleCloudStorageFileSystemOptions.Builder optionsBuilder = GoogleCloudStorageFileSystemOptions
            .newBuilder();/*  www. j a  v a2  s .com*/

    boolean enableMetadataCache = config.getBoolean(GCS_ENABLE_METADATA_CACHE_KEY,
            GCS_ENABLE_METADATA_CACHE_DEFAULT);
    LOG.debug("{} = {}", GCS_ENABLE_METADATA_CACHE_KEY, enableMetadataCache);
    optionsBuilder.setIsMetadataCacheEnabled(enableMetadataCache);

    boolean enableBucketDelete = config.getBoolean(GCE_BUCKET_DELETE_ENABLE_KEY,
            GCE_BUCKET_DELETE_ENABLE_DEFAULT);
    LOG.debug("{} = {}", GCE_BUCKET_DELETE_ENABLE_KEY, enableBucketDelete);
    optionsBuilder.setEnableBucketDelete(enableBucketDelete);

    DirectoryListCache.Type cacheType = DirectoryListCache.Type
            .valueOf(config.get(GCS_METADATA_CACHE_TYPE_KEY, GCS_METADATA_CACHE_TYPE_DEFAULT));
    LOG.debug("{} = {}", GCS_METADATA_CACHE_TYPE_KEY, cacheType);
    optionsBuilder.setCacheType(cacheType);

    String cacheBasePath = config.get(GCS_METADATA_CACHE_DIRECTORY_KEY, GCS_METADATA_CACHE_DIRECTORY_DEFAULT);
    LOG.debug("{} = {}", GCS_METADATA_CACHE_DIRECTORY_KEY, cacheBasePath);
    optionsBuilder.setCacheBasePath(cacheBasePath);

    long cacheMaxEntryAgeMillis = config.getLong(GCS_METADATA_CACHE_MAX_ENTRY_AGE_KEY,
            GCS_METADATA_CACHE_MAX_ENTRY_AGE_DEFAULT);
    LOG.debug("{} = {}", GCS_METADATA_CACHE_MAX_ENTRY_AGE_KEY, cacheMaxEntryAgeMillis);
    optionsBuilder.setCacheMaxEntryAgeMillis(cacheMaxEntryAgeMillis);

    long cacheMaxInfoAgeMillis = config.getLong(GCS_METADATA_CACHE_MAX_INFO_AGE_KEY,
            GCS_METADATA_CACHE_MAX_INFO_AGE_DEFAULT);
    LOG.debug("{} = {}", GCS_METADATA_CACHE_MAX_INFO_AGE_KEY, cacheMaxInfoAgeMillis);
    optionsBuilder.setCacheMaxInfoAgeMillis(cacheMaxInfoAgeMillis);

    GoogleCloudStorageFileSystemOptions.TimestampUpdatePredicate updatePredicate = ParentTimestampUpdateIncludePredicate
            .create(config);
    optionsBuilder.setShouldIncludeInTimestampUpdatesPredicate(updatePredicate);

    enableAutoRepairImplicitDirectories = config.getBoolean(GCS_ENABLE_REPAIR_IMPLICIT_DIRECTORIES_KEY,
            GCS_ENABLE_REPAIR_IMPLICIT_DIRECTORIES_DEFAULT);
    LOG.debug("{} = {}", GCS_ENABLE_REPAIR_IMPLICIT_DIRECTORIES_KEY, enableAutoRepairImplicitDirectories);

    enableInferImplicitDirectories = config.getBoolean(GCS_ENABLE_INFER_IMPLICIT_DIRECTORIES_KEY,
            GCS_ENABLE_INFER_IMPLICIT_DIRECTORIES_DEFAULT);
    LOG.debug("{} = {}", GCS_ENABLE_INFER_IMPLICIT_DIRECTORIES_KEY, enableInferImplicitDirectories);

    enableFlatGlob = config.getBoolean(GCS_ENABLE_FLAT_GLOB_KEY, GCS_ENABLE_FLAT_GLOB_DEFAULT);
    LOG.debug("{} = {}", GCS_ENABLE_FLAT_GLOB_KEY, enableFlatGlob);

    optionsBuilder.getCloudStorageOptionsBuilder()
            .setAutoRepairImplicitDirectoriesEnabled(enableAutoRepairImplicitDirectories)
            .setInferImplicitDirectoriesEnabled(enableInferImplicitDirectories);

    boolean enableMarkerFileCreation = config.getBoolean(GCS_ENABLE_MARKER_FILE_CREATION_KEY,
            GCS_ENABLE_MARKER_FILE_CREATION_DEFAULT);
    LOG.debug("{} = {}", GCS_ENABLE_MARKER_FILE_CREATION_KEY, enableMarkerFileCreation);

    optionsBuilder.getCloudStorageOptionsBuilder().setCreateMarkerObjects(enableMarkerFileCreation);

    String transportTypeString = config.get(GCS_HTTP_TRANSPORT_KEY, GCS_HTTP_TRANSPORT_DEFAULT);
    String proxyAddress = config.get(GCS_PROXY_ADDRESS_KEY, GCS_PROXY_ADDRESS_DEFAULT);
    HttpTransportFactory.HttpTransportType transportType = HttpTransportFactory
            .getTransportTypeOf(transportTypeString);

    optionsBuilder.getCloudStorageOptionsBuilder().setTransportType(transportType)
            .setProxyAddress(proxyAddress);

    String projectId = ConfigurationUtil.getMandatoryConfig(config, GCS_PROJECT_ID_KEY);

    optionsBuilder.getCloudStorageOptionsBuilder().setProjectId(projectId);

    long maxListItemsPerCall = config.getLong(GCS_MAX_LIST_ITEMS_PER_CALL, GCS_MAX_LIST_ITEMS_PER_CALL_DEFAULT);

    optionsBuilder.getCloudStorageOptionsBuilder().setMaxListItemsPerCall(maxListItemsPerCall);

    // Configuration for setting 250GB upper limit on file size to gain higher write throughput.
    boolean limitFileSizeTo250Gb = config.getBoolean(GCS_FILE_SIZE_LIMIT_250GB,
            GCS_FILE_SIZE_LIMIT_250GB_DEFAULT);

    optionsBuilder.getCloudStorageOptionsBuilder().getWriteChannelOptionsBuilder()
            .setFileSizeLimitedTo250Gb(limitFileSizeTo250Gb);

    // Configuration for setting GoogleCloudStorageWriteChannel upload buffer size.
    int uploadBufferSize = config.getInt(WRITE_BUFFERSIZE_KEY, WRITE_BUFFERSIZE_DEFAULT);
    LOG.debug("{} = {}", WRITE_BUFFERSIZE_KEY, uploadBufferSize);

    optionsBuilder.getCloudStorageOptionsBuilder().getWriteChannelOptionsBuilder()
            .setUploadBufferSize(uploadBufferSize);

    String applicationNameSuffix = config.get(GCS_APPLICATION_NAME_SUFFIX_KEY,
            GCS_APPLICATION_NAME_SUFFIX_DEFAULT);
    LOG.debug("{} = {}", GCS_APPLICATION_NAME_SUFFIX_KEY, applicationNameSuffix);

    String applicationName = GHFS_ID;
    if (!Strings.isNullOrEmpty(applicationNameSuffix)) {
        applicationName = applicationName + applicationNameSuffix;
    }

    LOG.debug("Setting GCS application name to {}", applicationName);
    optionsBuilder.getCloudStorageOptionsBuilder().setAppName(applicationName);

    boolean enablePerformanceCache = config.getBoolean(GCS_ENABLE_PERFORMANCE_CACHE_KEY,
            GCS_ENABLE_PERFORMANCE_CACHE_DEFAULT);
    LOG.debug("{} = {}", GCS_ENABLE_PERFORMANCE_CACHE_KEY, enablePerformanceCache);
    optionsBuilder.setIsPerformanceCacheEnabled(enablePerformanceCache);

    long performanceCacheMaxEntryAgeMillis = config.getLong(GCS_PERFORMANCE_CACHE_MAX_ENTRY_AGE_MILLIS_KEY,
            GCS_PERFORMANCE_CACHE_MAX_ENTRY_AGE_MILLIS_DEFAULT);
    LOG.debug("{} = {}", GCS_PERFORMANCE_CACHE_MAX_ENTRY_AGE_MILLIS_KEY, performanceCacheMaxEntryAgeMillis);

    boolean listCachingEnabled = config.getBoolean(GCS_PERFORMANCE_CACHE_LIST_CACHING_ENABLE_KEY,
            GCS_PERFORMANCE_CACHE_LIST_CACHING_ENABLE_DEFAULT);
    LOG.debug("{} = {}", GCS_PERFORMANCE_CACHE_LIST_CACHING_ENABLE_KEY, listCachingEnabled);
    optionsBuilder.getPerformanceCachingOptionsBuilder().setMaxEntryAgeMillis(performanceCacheMaxEntryAgeMillis)
            .setInferImplicitDirectoriesEnabled(enableInferImplicitDirectories)
            .setListCachingEnabled(listCachingEnabled);

    return optionsBuilder;
}

From source file:com.hortonworks.hbase.replication.bridge.HBaseServer.java

License:Apache License

protected HBaseServer(String bindAddress, int port, Class<? extends Writable> paramClass, int handlerCount,
        int priorityHandlerCount, Configuration conf, String serverName, int highPriorityLevel)
        throws IOException {
    this.bindAddress = bindAddress;
    this.conf = conf;
    this.port = port;
    this.paramClass = paramClass;
    this.handlerCount = handlerCount;
    this.priorityHandlerCount = priorityHandlerCount;
    this.socketSendBufferSize = 0;

    // temporary backward compatibility
    String oldMaxQueueSize = this.conf.get("ipc.server.max.queue.size");
    if (oldMaxQueueSize == null) {
        this.maxQueueLength = this.conf.getInt("ipc.server.max.callqueue.length",
                handlerCount * DEFAULT_MAX_CALLQUEUE_LENGTH_PER_HANDLER);
    } else {// w  w w  .j  a v  a2s  .  c om
        LOG.warn("ipc.server.max.queue.size was renamed " + "ipc.server.max.callqueue.length, "
                + "please update your configuration");
        this.maxQueueLength = Integer.getInteger(oldMaxQueueSize);
    }

    this.maxQueueSize = this.conf.getInt("ipc.server.max.callqueue.size", DEFAULT_MAX_CALLQUEUE_SIZE);
    this.readThreads = conf.getInt("ipc.server.read.threadpool.size", 10);
    this.callQueue = new LinkedBlockingQueue<Call>(maxQueueLength);
    if (priorityHandlerCount > 0) {
        this.priorityCallQueue = new LinkedBlockingQueue<Call>(maxQueueLength); // TODO hack on size
    } else {
        this.priorityCallQueue = null;
    }
    this.highPriorityLevel = highPriorityLevel;
    this.maxIdleTime = 2 * conf.getInt("ipc.client.connection.maxidletime", 1000);
    this.maxConnectionsToNuke = conf.getInt("ipc.client.kill.max", 10);
    this.thresholdIdleConnections = conf.getInt("ipc.client.idlethreshold", 4000);
    this.purgeTimeout = conf.getLong("ipc.client.call.purge.timeout", 2 * HConstants.DEFAULT_HBASE_RPC_TIMEOUT);
    this.numOfReplicationHandlers = conf.getInt("hbase.regionserver.replication.handler.count", 3);
    if (numOfReplicationHandlers > 0) {
        this.replicationQueue = new LinkedBlockingQueue<Call>(maxQueueSize);
    }
    // Start the listener here and let it bind to the port
    listener = new Listener();
    this.port = listener.getAddress().getPort();
    this.rpcMetrics = new HBaseRpcMetrics(serverName, Integer.toString(this.port));
    this.tcpNoDelay = conf.getBoolean("ipc.server.tcpnodelay", false);
    this.tcpKeepAlive = conf.getBoolean("ipc.server.tcpkeepalive", true);

    this.warnDelayedCalls = conf.getInt(WARN_DELAYED_CALLS, DEFAULT_WARN_DELAYED_CALLS);
    this.delayedCalls = new AtomicInteger(0);

    this.responseQueuesSizeThrottler = new SizeBasedThrottler(
            conf.getLong(RESPONSE_QUEUES_MAX_SIZE, DEFAULT_RESPONSE_QUEUES_MAX_SIZE));

    // Create the responder here
    responder = new Responder();
}

From source file:com.hp.hpit.cs.MyCombineFileInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {

    long minSizeNode = 0;
    long minSizeRack = 0;
    long maxSize = 0;
    Configuration conf = job.getConfiguration();

    // the values specified by setxxxSplitSize() takes precedence over the
    // values that might have been specified in the config
    if (minSplitSizeNode != 0) {
        minSizeNode = minSplitSizeNode;/*www.  ja v a2 s . c o  m*/
    } else {
        minSizeNode = conf.getLong(SPLIT_MINSIZE_PERNODE, 0);
    }
    if (minSplitSizeRack != 0) {
        minSizeRack = minSplitSizeRack;
    } else {
        minSizeRack = conf.getLong(SPLIT_MINSIZE_PERRACK, 0);
    }
    if (maxSplitSize != 0) {
        maxSize = maxSplitSize;
    } else {
        maxSize = conf.getLong("mapreduce.input.fileinputformat.split.maxsize", 0);
    }
    if (minSizeNode != 0 && maxSize != 0 && minSizeNode > maxSize) {
        throw new IOException("Minimum split size pernode " + minSizeNode
                + " cannot be larger than maximum split size " + maxSize);
    }
    if (minSizeRack != 0 && maxSize != 0 && minSizeRack > maxSize) {
        throw new IOException("Minimum split size per rack" + minSizeRack
                + " cannot be larger than maximum split size " + maxSize);
    }
    if (minSizeRack != 0 && minSizeNode > minSizeRack) {
        throw new IOException("Minimum split size per node" + minSizeNode
                + " cannot be smaller than minimum split " + "size per rack " + minSizeRack);
    }

    // all the files in input set
    Path[] paths = FileUtil.stat2Paths(listStatus(job).toArray(new FileStatus[0]));
    List<InputSplit> splits = new ArrayList<InputSplit>();
    if (paths.length == 0) {
        return splits;
    }

    // Convert them to Paths first. This is a costly operation and 
    // we should do it first, otherwise we will incur doing it multiple
    // times, one time each for each pool in the next loop.
    List<Path> newpaths = new LinkedList<Path>();
    for (int i = 0; i < paths.length; i++) {
        Path p = new Path(paths[i].toUri());
        newpaths.add(p);
    }
    paths = null;

    // In one single iteration, process all the paths in a single pool.
    // Processing one pool at a time ensures that a split contains paths
    // from a single pool only.
    for (MultiPathFilter onepool : pools) {
        ArrayList<Path> myPaths = new ArrayList<Path>();

        // pick one input path. If it matches all the filters in a pool,
        // add it to the output set
        for (Iterator<Path> iter = newpaths.iterator(); iter.hasNext();) {
            Path p = iter.next();
            if (onepool.accept(p)) {
                myPaths.add(p); // add it to my output set
                iter.remove();
            }
        }
        // create splits for all files in this pool.
        getMoreSplits(job, myPaths.toArray(new Path[myPaths.size()]), maxSize, minSizeNode, minSizeRack,
                splits);
    }

    // create splits for all files that are not in any pool.
    getMoreSplits(job, newpaths.toArray(new Path[newpaths.size()]), maxSize, minSizeNode, minSizeRack, splits);

    // free up rackToNodes map
    rackToNodes.clear();
    return splits;
}

From source file:com.ikanow.aleph2.analytics.hadoop.assets.UpdatedCombineFileInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    long minSizeNode = 0;
    long minSizeRack = 0;
    long maxSize = 0;
    Configuration conf = job.getConfiguration();

    // the values specified by setxxxSplitSize() takes precedence over the
    // values that might have been specified in the config
    if (minSplitSizeNode != 0) {
        minSizeNode = minSplitSizeNode;//from   w  ww  . j a  va 2 s.  c  o  m
    } else {
        minSizeNode = conf.getLong(SPLIT_MINSIZE_PERNODE, 0);
    }
    if (minSplitSizeRack != 0) {
        minSizeRack = minSplitSizeRack;
    } else {
        minSizeRack = conf.getLong(SPLIT_MINSIZE_PERRACK, 0);
    }
    if (maxSplitSize != 0) {
        maxSize = maxSplitSize;
    } else {
        maxSize = conf.getLong("mapreduce.input.fileinputformat.split.maxsize", 0);
        // If maxSize is not configured, a single split will be generated per
        // node.
    }
    if (minSizeNode != 0 && maxSize != 0 && minSizeNode > maxSize) {
        throw new IOException("Minimum split size pernode " + minSizeNode
                + " cannot be larger than maximum split size " + maxSize);
    }
    if (minSizeRack != 0 && maxSize != 0 && minSizeRack > maxSize) {
        throw new IOException("Minimum split size per rack " + minSizeRack
                + " cannot be larger than maximum split size " + maxSize);
    }
    if (minSizeRack != 0 && minSizeNode > minSizeRack) {
        throw new IOException("Minimum split size per node " + minSizeNode
                + " cannot be larger than minimum split " + "size per rack " + minSizeRack);
    }

    // all the files in input set
    List<FileStatus> stats = listStatus(job);
    List<InputSplit> splits = new ArrayList<InputSplit>();
    if (stats.size() == 0) {
        return splits;
    }

    // In one single iteration, process all the paths in a single pool.
    // Processing one pool at a time ensures that a split contains paths
    // from a single pool only.
    for (MultiPathFilter onepool : pools) {
        ArrayList<FileStatus> myPaths = new ArrayList<FileStatus>();

        // pick one input path. If it matches all the filters in a pool,
        // add it to the output set
        for (Iterator<FileStatus> iter = stats.iterator(); iter.hasNext();) {
            FileStatus p = iter.next();
            if (onepool.accept(p.getPath())) {
                myPaths.add(p); // add it to my output set
                iter.remove();
            }
        }
        // create splits for all files in this pool.
        getMoreSplits(job, myPaths, maxSize, minSizeNode, minSizeRack, splits);
    }

    // create splits for all files that are not in any pool.
    getMoreSplits(job, stats, maxSize, minSizeNode, minSizeRack, splits);

    // free up rackToNodes map
    rackToNodes.clear();
    return splits;
}

From source file:com.inclouds.hbase.rowcache.RowCache.java

License:Open Source License

/**
 * Start co-processor - cache./*from  w w  w.  j av  a2 s .  c o m*/
 * 
 * @param cfg
 *          the cfg
 * @throws IOException
 *           Signals that an I/O exception has occurred.
 */
public void start(Configuration cfg) throws IOException {

    // Get all config from Configuration object
    // Start - load cache

    this.config = cfg;

    synchronized (RowCache.class) {

        if (rowCache != null)
            return;

        final CacheConfiguration ccfg = ConfigHelper.getCacheConfiguration(cfg);
        // set cache name
        ccfg.setCacheName("row-cache");

        long maxMemory = cfg.getLong(ROWCACHE_MAXMEMORY, DEFAULT_MAX_MEMORY);
        ccfg.setMaxMemory(maxMemory);
        LOG.info("[row-cache] Setting max memory to " + maxMemory);
        long maxItems = cfg.getLong(ROWCACHE_MAXITEMS, DEFAULT_MAXITEMS);
        if (maxItems > Integer.MAX_VALUE - 1) {
            maxItems = Integer.MAX_VALUE - 1;
            LOG.warn("[row-cache] Max items is too large " + maxItems);
        } else {
            LOG.info("[row-cache] Setting max items to " + maxItems);
        }

        LOG.info("[row-cache] Direct memory buffer size set to " + StringUtils.byteDesc(RowCache.ioBufferSize));

        ccfg.setBucketNumber((int) maxItems);
        String codecName = cfg.get(ROWCACHE_COMPRESSION, DEFAULT_COMPRESSION);

        ccfg.setCodecType(CodecType.valueOf(codecName.toUpperCase()));
        LOG.info("[row-cache] compression codec=" + codecName);

        isPersistentCache = Boolean.parseBoolean(cfg.get(ROWCACHE_PERSISTENT, DEFAULT_PERSISTENT));

        LOG.info("[row-cache] persistent=" + isPersistentCache);

        String[] dataRoots = getDataRoots(cfg.get(ROWCACHE_CACHE_DATA_ROOTS));

        if (isPersistentCache && dataRoots == null) {
            dataRoots = getHDFSRoots(cfg);

            if (dataRoots == null) {
                LOG.warn("Data roots are not defined for Row Cache. Set persistent mode to false.");
                isPersistentCache = false;
            }
        }
        // TODO - compression
        CacheManager manager = CacheManager.getInstance();
        try {

            if (isPersistentCache) {
                RawFSConfiguration storeConfig = new RawFSConfiguration();
                storeConfig.setDiskStoreImplementation(RawFSStore.class);
                storeConfig.setStoreName(ccfg.getCacheName());
                storeConfig.setDbDataStoreRoots(dataRoots);
                storeConfig.setPersistenceMode(PersistenceMode.ONDEMAND);
                storeConfig.setDbSnapshotInterval(15);
                ccfg.setDataStoreConfiguration(storeConfig);
                // Load cache data
                rowCache = manager.getCache(ccfg, null);
            } else {

                rowCache = manager.getCache(ccfg, new ProgressListener() {

                    @Override
                    public void canceled() {
                        LOG.info("Canceled");
                    }

                    @Override
                    public void error(Throwable t, boolean aborted) {
                        LOG.error("Aborted=" + aborted, t);
                    }

                    @Override
                    public void finished() {
                        LOG.info("Finished loading cache");
                    }

                    @Override
                    public void progress(long done, long total) {
                        LOG.info("Loaded " + done + " out of " + total);
                    }

                    @Override
                    public void started() {
                        LOG.info("Started loading scan cache data from "
                                + ccfg.getDiskStoreConfiguration().getDbDataStoreRoots());
                    }
                });

            }
        } catch (Throwable ex) {
            throw new IOException(ex);
        }

        LOG.info("[row-cache] coprocessor started ");

        RowCache.instance = this;

        Runnable r = new Runnable() {
            public void run() {
                LOG.info("[row-cache] Stats thread started. ");
                while (true) {
                    try {
                        Thread.sleep(STATS_INTERVAL);
                    } catch (InterruptedException e) {
                    }

                    long lastR = lastRequests;
                    long lastH = lastHits;
                    long requests = rowCache.getTotalRequestCount();
                    long hits = rowCache.getHitCount();
                    if (requests != lastRequests) {
                        // Log only if new data
                        LOG.info("[L1-OFFHEAP]: accesses=" + requests + " hits=" + hits + " hitRatio="
                                + ((requests == 0) ? "0.00"
                                        : StringUtils.formatPercent((double) hits / requests, 2) + "%"
                                                + " Last period: accesses=" + (requests - lastR) + " hits="
                                                + (hits - lastH) + " hitRatio="
                                                + (((requests - lastR) == 0) ? "0.00"
                                                        : StringUtils.formatPercent(
                                                                (double) (hits - lastH) / (requests - lastR),
                                                                2)))
                                + "%" + " maxMemory=" + StringUtils.byteDesc(rowCache.getMemoryLimit())
                                + " allocatedMemory=" + StringUtils.byteDesc(rowCache.getAllocatedMemorySize())
                                + " freeMemory="
                                + StringUtils
                                        .byteDesc(rowCache.getMemoryLimit() - rowCache.getAllocatedMemorySize())
                                + " totalItems=" + rowCache.size() + " evicted=" + rowCache.getEvictedCount());
                        lastRequests = requests;
                        lastHits = hits;
                    }
                }
            }
        };

        statThread = new Thread(r, "BigBaseRowCache.StatisticsThread");
        statThread.start();
        // Register shutdown hook
        registerShutdownHook();
    }
}