Example usage for org.apache.hadoop.conf Configuration getLong

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration getLong.

Prototype

public long getLong(String name, long defaultValue)

Source Link

Document

Get the value of the name property as a long.

Usage

From source file:org.apache.crunch.kafka.inputformat.KafkaRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext)
        throws IOException, InterruptedException {
    if (!(inputSplit instanceof KafkaInputSplit)) {
        throw new CrunchRuntimeException("InputSplit for RecordReader is not valid split type.");
    }/*from  w w  w .  j av  a  2  s .  c  o  m*/
    Properties kafkaConnectionProperties = filterConnectionProperties(
            getKafkaConnectionProperties(taskAttemptContext.getConfiguration()));

    consumer = new KafkaConsumer<>(kafkaConnectionProperties);
    KafkaInputSplit split = (KafkaInputSplit) inputSplit;
    TopicPartition topicPartition = split.getTopicPartition();

    consumer.assign(Collections.singletonList(topicPartition));

    //suggested hack to gather info without gathering data
    consumer.poll(0);
    //now seek to the desired start location
    startingOffset = split.getStartingOffset();
    consumer.seek(topicPartition, startingOffset);

    currentOffset = startingOffset - 1;
    endingOffset = split.getEndingOffset();

    maxNumberOfRecords = endingOffset - startingOffset;
    if (LOG.isInfoEnabled()) {
        LOG.info("Reading data from {} between {} and {}",
                new Object[] { topicPartition, startingOffset, endingOffset });
    }

    Configuration config = taskAttemptContext.getConfiguration();
    consumerPollTimeout = config.getLong(CONSUMER_POLL_TIMEOUT_KEY, CONSUMER_POLL_TIMEOUT_DEFAULT);
    maxNumberAttempts = config.getInt(KAFKA_RETRY_ATTEMPTS_KEY, KAFKA_RETRY_ATTEMPTS_DEFAULT);
    maxConcurrentEmptyResponses = config.getInt(KAFKA_EMPTY_RETRY_ATTEMPTS_KEY,
            KAFKA_RETRY_EMPTY_ATTEMPTS_DEFAULT);
    concurrentEmptyResponses = 0;
}

From source file:org.apache.crunch.kafka.record.KafkaInputFormat.java

License:Apache License

/**
 * Reads the {@code configuration} to determine which topics, partitions, and offsets should be used for reading data.
 *
 * @param configuration the configuration to derive the data to read.
 * @return a map of {@link TopicPartition} to a pair of start and end offsets.
 * @throws IllegalStateException if the {@code configuration} does not have the start and end offsets set properly
 *                               for a partition.
 *///  w w w .ja  v  a  2 s. c  om
public static Map<TopicPartition, Pair<Long, Long>> getOffsets(Configuration configuration) {
    Map<TopicPartition, Pair<Long, Long>> offsets = new HashMap<>();
    //find configuration for all of the topics with defined partitions
    Map<String, String> topicPartitionKeys = configuration.getValByRegex(TOPIC_KEY_REGEX);

    //for each topic start to process it's partitions
    for (String key : topicPartitionKeys.keySet()) {
        String topic = getTopicFromKey(key);
        int[] partitions = configuration.getInts(key);
        //for each partition find and add the start/end offset
        for (int partitionId : partitions) {
            TopicPartition topicPartition = new TopicPartition(topic, partitionId);
            long start = configuration.getLong(generatePartitionStartKey(topic, partitionId), Long.MIN_VALUE);
            long end = configuration.getLong(generatePartitionEndKey(topic, partitionId), Long.MIN_VALUE);

            if (start == Long.MIN_VALUE || end == Long.MIN_VALUE) {
                throw new IllegalStateException("The " + topicPartition + " has an invalid start:" + start
                        + " or end:" + end + " offset configured.");
            }

            offsets.put(topicPartition, Pair.of(start, end));
        }
    }

    return offsets;
}

From source file:org.apache.crunch.kafka.record.KafkaRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext)
        throws IOException, InterruptedException {
    if (!(inputSplit instanceof KafkaInputSplit)) {
        throw new CrunchRuntimeException("InputSplit for RecordReader is not valid split type.");
    }/* w w  w.  j a v a  2 s  .c  om*/

    kafkaConnectionProperties = filterConnectionProperties(
            getKafkaConnectionProperties(taskAttemptContext.getConfiguration()));

    consumer = new KafkaConsumer<>(kafkaConnectionProperties);

    KafkaInputSplit split = (KafkaInputSplit) inputSplit;
    topicPartition = split.getTopicPartition();

    consumer.assign(Collections.singletonList(topicPartition));

    //suggested hack to gather info without gathering data
    consumer.poll(0);

    //now seek to the desired start location
    startingOffset = split.getStartingOffset();
    consumer.seek(topicPartition, startingOffset);

    currentOffset = startingOffset - 1;
    endingOffset = split.getEndingOffset();

    maxNumberOfRecords = endingOffset - startingOffset;
    if (LOG.isInfoEnabled()) {
        LOG.info("Reading data from {} between {} and {}",
                new Object[] { topicPartition, startingOffset, endingOffset });
    }

    Configuration config = taskAttemptContext.getConfiguration();
    consumerPollTimeout = config.getLong(CONSUMER_POLL_TIMEOUT_KEY, CONSUMER_POLL_TIMEOUT_DEFAULT);
    maxNumberAttempts = config.getInt(KAFKA_RETRY_ATTEMPTS_KEY, KAFKA_RETRY_ATTEMPTS_DEFAULT);
    maxConcurrentEmptyResponses = config.getInt(KAFKA_EMPTY_RETRY_ATTEMPTS_KEY,
            KAFKA_RETRY_EMPTY_ATTEMPTS_DEFAULT);
    concurrentEmptyResponses = 0;
}

From source file:org.apache.crunch.util.PartitionUtils.java

License:Apache License

public static <T> int getRecommendedPartitions(PCollection<T> pcollection, Configuration conf) {
    long bytesPerTask = conf.getLong(BYTES_PER_REDUCE_TASK, DEFAULT_BYTES_PER_REDUCE_TASK);
    return 1 + (int) (pcollection.getSize() / bytesPerTask);
}

From source file:org.apache.druid.indexer.hadoop.DatasourceInputFormat.java

License:Apache License

public static long getMaxSplitSize(final Configuration conf, final String dataSource) {
    return conf.getLong(StringUtils.format("%s.%s", CONF_MAX_SPLIT_SIZE, dataSource), 0L);
}

From source file:org.apache.giraph.conf.LongConfOption.java

License:Apache License

/**
 * Lookup value, use passed in default value if not found.
 *
 * @param conf Configuration//from w w  w  .java2s.c om
 * @param val default value to use
 * @return set for key, or default value passed in
 */
public long getWithDefault(Configuration conf, long val) {
    return conf.getLong(getKey(), val);
}

From source file:org.apache.giraph.examples.RandomWalkWorkerContext.java

License:Apache License

/**
 * Initialize sources for Random Walk with Restart. First option
 * (preferential) is single source given from the command line as a parameter.
 * Second option is a file with a list of vertex IDs, one per line. In this
 * second case the preference vector is a uniform distribution over these
 * vertexes.//from w w w .  j  av  a  2s  . c  o  m
 * @param configuration The configuration.
 * @return a (possibly empty) set of source vertices
 */
private ImmutableSet<Long> initializeSources(Configuration configuration) {
    ImmutableSet.Builder<Long> builder = ImmutableSet.builder();
    long sourceVertex = configuration.getLong(SOURCE_VERTEX, Long.MIN_VALUE);
    if (sourceVertex != Long.MIN_VALUE) {
        return ImmutableSet.of(sourceVertex);
    } else {
        Path sourceFile = null;
        try {

            Path[] cacheFiles = DistributedCache.getLocalCacheFiles(configuration);
            if (cacheFiles == null || cacheFiles.length == 0) {
                // empty set if no source vertices configured
                return ImmutableSet.of();
            }

            sourceFile = cacheFiles[0];
            FileSystem fs = FileSystem.getLocal(configuration);
            BufferedReader in = new BufferedReader(
                    new InputStreamReader(fs.open(sourceFile), Charset.defaultCharset()));
            String line;
            while ((line = in.readLine()) != null) {
                builder.add(Long.parseLong(line));
            }
            in.close();
        } catch (IOException e) {
            getContext().setStatus("Could not load local cache files: " + sourceFile);
            LOG.error("Could not load local cache files: " + sourceFile, e);
        }
    }
    return builder.build();
}

From source file:org.apache.gobblin.runtime.TaskExecutor.java

License:Apache License

/**
 * Constructor to work with Hadoop {@link org.apache.hadoop.conf.Configuration}.
 *///from  www .  j a v  a  2  s.c om
public TaskExecutor(Configuration conf) {
    this(conf.getInt(ConfigurationKeys.TASK_EXECUTOR_THREADPOOL_SIZE_KEY,
            ConfigurationKeys.DEFAULT_TASK_EXECUTOR_THREADPOOL_SIZE),
            conf.getInt(ConfigurationKeys.TASK_RETRY_THREAD_POOL_CORE_SIZE_KEY,
                    ConfigurationKeys.DEFAULT_TASK_RETRY_THREAD_POOL_CORE_SIZE),
            conf.getLong(ConfigurationKeys.TASK_RETRY_INTERVAL_IN_SEC_KEY,
                    ConfigurationKeys.DEFAULT_TASK_RETRY_INTERVAL_IN_SEC),
            conf.getInt(ConfigurationKeys.QUEUED_TASK_TIME_MAX_SIZE,
                    ConfigurationKeys.DEFAULT_QUEUED_TASK_TIME_MAX_SIZE),
            conf.getLong(ConfigurationKeys.QUEUED_TASK_TIME_MAX_AGE,
                    ConfigurationKeys.DEFAULT_QUEUED_TASK_TIME_MAX_AGE),
            conf.getInt(ConfigurationKeys.METRIC_TIMER_WINDOW_SIZE_IN_MINUTES,
                    ConfigurationKeys.DEFAULT_METRIC_TIMER_WINDOW_SIZE_IN_MINUTES));
}

From source file:org.apache.hadoop.examples.RandomTextWriter.java

License:Apache License

/**
 * This is the main routine for launching a distributed random write job.
 * It runs 10 maps/node and each node writes 1 gig of data to a DFS file.
 * The reduce doesn't do anything.//from  w  ww . j ava 2 s .  c  o m
 * 
 * @throws IOException 
 */
public int run(String[] args) throws Exception {
    if (args.length == 0) {
        return printUsage();
    }

    Configuration conf = getConf();
    JobClient client = new JobClient(conf);
    ClusterStatus cluster = client.getClusterStatus();
    int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10);
    long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP, 1 * 1024 * 1024 * 1024);
    if (numBytesToWritePerMap == 0) {
        System.err.println("Cannot have " + BYTES_PER_MAP + " set to 0");
        return -2;
    }
    long totalBytesToWrite = conf.getLong(TOTAL_BYTES,
            numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers());
    int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
    if (numMaps == 0 && totalBytesToWrite > 0) {
        numMaps = 1;
        conf.setLong(BYTES_PER_MAP, totalBytesToWrite);
    }
    conf.setInt(MRJobConfig.NUM_MAPS, numMaps);

    Job job = Job.getInstance(conf);

    job.setJarByClass(RandomTextWriter.class);
    job.setJobName("random-text-writer");

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setInputFormatClass(RandomWriter.RandomInputFormat.class);
    job.setMapperClass(RandomTextMapper.class);

    Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class;
    List<String> otherArgs = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-outFormat".equals(args[i])) {
                outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class);
            } else {
                otherArgs.add(args[i]);
            }
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage(); // exits
        }
    }

    job.setOutputFormatClass(outputFormatClass);
    FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(0)));

    System.out.println("Running " + numMaps + " maps.");

    // reducer NONE
    job.setNumReduceTasks(0);

    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    int ret = job.waitForCompletion(true) ? 0 : 1;
    Date endTime = new Date();
    System.out.println("Job ended: " + endTime);
    System.out.println("The job took " + (endTime.getTime() - startTime.getTime()) / 1000 + " seconds.");

    return ret;
}

From source file:org.apache.hadoop.examples.RandomWriter.java

License:Apache License

/**
 * This is the main routine for launching a distributed random write job.
 * It runs 10 maps/node and each node writes 1 gig of data to a DFS file.
 * The reduce doesn't do anything./*from ww w  . j a v a2  s . c  o m*/
 * 
 * @throws IOException 
 */
public int run(String[] args) throws Exception {
    if (args.length == 0) {
        System.out.println("Usage: writer <out-dir>");
        ToolRunner.printGenericCommandUsage(System.out);
        return 2;
    }

    Path outDir = new Path(args[0]);
    Configuration conf = getConf();
    JobClient client = new JobClient(conf);
    ClusterStatus cluster = client.getClusterStatus();
    int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10);
    long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP, 1 * 1024 * 1024 * 1024);
    if (numBytesToWritePerMap == 0) {
        System.err.println("Cannot have" + BYTES_PER_MAP + " set to 0");
        return -2;
    }
    long totalBytesToWrite = conf.getLong(TOTAL_BYTES,
            numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers());
    int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
    if (numMaps == 0 && totalBytesToWrite > 0) {
        numMaps = 1;
        conf.setLong(BYTES_PER_MAP, totalBytesToWrite);
    }
    conf.setInt(MRJobConfig.NUM_MAPS, numMaps);

    Job job = Job.getInstance(conf);

    job.setJarByClass(RandomWriter.class);
    job.setJobName("random-writer");
    FileOutputFormat.setOutputPath(job, outDir);
    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(BytesWritable.class);
    job.setInputFormatClass(RandomInputFormat.class);
    job.setMapperClass(RandomMapper.class);
    job.setReducerClass(Reducer.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    System.out.println("Running " + numMaps + " maps.");

    // reducer NONE
    job.setNumReduceTasks(0);

    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    int ret = job.waitForCompletion(true) ? 0 : 1;
    Date endTime = new Date();
    System.out.println("Job ended: " + endTime);
    System.out.println("The job took " + (endTime.getTime() - startTime.getTime()) / 1000 + " seconds.");

    return ret;
}