Example usage for com.google.common.collect Iterables partition

Introduction

In this page you can find the example usage for com.google.common.collect Iterables partition.

Prototype

public static <T> Iterable<List<T>> partition(final Iterable<T> iterable, final int size)

Source Link

Document

Divides an iterable into unmodifiable sublists of the given size (the final iterable may be smaller).

Usage

From source file:com.cloudera.nav.sdk.client.MetadataResultIterator.java

public MetadataResultIterator(NavApiCient client, MetadataType type, String query, Integer limit,
        Iterable<String> extractorRunIds) {
    this.client = client;
    this.type = type;
    this.userQuery = query;
    this.limit = limit;
    this.partitionRunIdIterator = Iterables.partition(extractorRunIds, MAX_QUERY_PARTITION_SIZE).iterator();
    if (Iterables.isEmpty(extractorRunIds)) {
        nextQuery = userQuery;// w w  w .j a  va  2s  .  c o  m
    } else {
        getNextQuery();
    }
    getNextBatch();
}

From source file:com.cloudera.science.quince.PrintSamplesTool.java

@Override
public int run(String[] args) throws Exception {
    JCommander jc = new JCommander(this);
    try {//from  w  w w  . j  ava2  s.  com
        jc.parse(args);
    } catch (ParameterException e) {
        jc.usage();
        return 1;
    }

    if (paths == null || paths.size() != 1) {
        jc.usage();
        return 1;
    }

    String inputPathString = paths.get(0);

    Configuration conf = getConf();
    Path inputPath = new Path(inputPathString);

    Path[] vcfs = FileUtils.findVcfs(inputPath, conf);
    Set<String> samples = SampleUtils.uniqueSamples(conf, vcfs);

    if (samplesPerLine == 0) { // all on one line
        System.out.println(Joiner.on(',').join(samples));
    } else {
        for (List<String> line : Iterables.partition(samples, samplesPerLine)) {
            System.out.println(Joiner.on(',').join(line));
        }
    }

    return 0;
}

From source file:com.palantir.common.base.BatchingVisitableFromIterable.java

@Override
public <K extends Exception> void batchAcceptSizeHint(int batchSize, ConsistentVisitor<T, K> v) throws K {
    /*/*from  w  ww .j a  v  a  2s  . c o m*/
     * Iterables.partition allocates an array of size batchSize, so avoid an OOM by making sure
     * it's not too big.
     */
    batchSize = Math.min(batchSize, MAX_BATCH_SIZE);
    for (List<T> list : Iterables.partition(iterable, batchSize)) {
        if (!v.visit(list)) {
            return;
        }
    }
}

From source file:com.stratio.decision.functions.SaveToMongoActionExecutionFunction.java

@Override
public void process(Iterable<StratioStreamingMessage> messages) throws Exception {

    Integer partitionSize = maxBatchSize;

    if (partitionSize == null || partitionSize <= 0) {
        partitionSize = Iterables.size(messages);
    }//w  w w  .j av  a 2  s .  c om

    Iterable<List<StratioStreamingMessage>> partitionIterables = Iterables.partition(messages, partitionSize);

    try {

        for (List<StratioStreamingMessage> messageList : partitionIterables) {

            Map<String, BulkWriteOperation> elementsToInsert = new HashMap<String, BulkWriteOperation>();

            for (StratioStreamingMessage event : messageList) {
                BasicDBObject object = new BasicDBObject(TIMESTAMP_FIELD, event.getTimestamp());
                for (ColumnNameTypeValue columnNameTypeValue : event.getColumns()) {
                    object.append(columnNameTypeValue.getColumn(), columnNameTypeValue.getValue());
                }

                BulkWriteOperation bulkInsertOperation = elementsToInsert.get(event.getStreamName());

                if (bulkInsertOperation == null) {
                    bulkInsertOperation = getDB().getCollection(event.getStreamName())
                            .initializeUnorderedBulkOperation();

                    elementsToInsert.put(event.getStreamName(), bulkInsertOperation);
                    getDB().getCollection(event.getStreamName())
                            .createIndex(new BasicDBObject(TIMESTAMP_FIELD, -1));
                }

                bulkInsertOperation.insert(object);
            }

            for (Entry<String, BulkWriteOperation> stratioStreamingMessage : elementsToInsert.entrySet()) {
                stratioStreamingMessage.getValue().execute();
            }
        }

    } catch (Exception e) {
        log.error("Error saving in Mongo: " + e.getMessage());
    }
}

From source file:terrastore.util.collect.parallel.ParallelUtils.java

public static <I, O, C extends Collection> C parallelSliceMap(final Collection<I> input, int sliceSize,
        final MapTask<I, O> mapper, final MapCollector<O, C> collector, ExecutorService executor)
        throws ParallelExecutionException {
    try {//from   w ww .  j a v a  2s. com
        Iterable<List<I>> slices = Iterables.partition(input, sliceSize);
        List<Callable<List<O>>> tasks = new LinkedList<Callable<List<O>>>();

        for (final List<I> slice : slices) {
            tasks.add(new Callable<List<O>>() {

                @Override
                public List<O> call() throws ParallelExecutionException {
                    List<O> outputs = new ArrayList<O>(slice.size());
                    for (I current : slice) {
                        O result = mapper.map(current);
                        if (result != null) {
                            outputs.add(result);
                        }
                    }
                    return outputs;
                }

            });
        }
        List<Future<List<O>>> results = executor.invokeAll(tasks);
        List<O> outputs = new ArrayList<O>(results.size());
        for (Future<List<O>> future : results) {
            List<O> result = future.get();
            if (result != null) {
                outputs.addAll(result);
            }
        }
        return collector.collect(outputs);
    } catch (ExecutionException ex) {
        if (ex.getCause() instanceof ParallelExecutionException) {
            throw (ParallelExecutionException) ex.getCause();
        } else {
            throw new ParallelExecutionException(ex.getCause());
        }
    } catch (InterruptedException ex) {
        throw new ParallelExecutionException(ex.getCause());
    }
}

From source file:org.jclouds.cloudwatch.CloudWatch.java

/**
 * Pushes metrics to CloudWatch./*  w ww  .  ja v a2 s .c  om*/
 *
 * @param cloudWatchApi the {@link CloudWatchApi} to use for the request
 * @param region the region to put the metrics in
 * @param metrics the metrics to publish
 * @param namespace the namespace to publish the metrics in
 */
public static void putMetricData(CloudWatchApi cloudWatchApi, String region, Iterable<MetricDatum> metrics,
        String namespace) {
    MetricApi metricApi = cloudWatchApi.getMetricApiForRegion(region);

    for (List<MetricDatum> slice : Iterables.partition(metrics, 10)) {
        metricApi.putMetricsInNamespace(slice, namespace);
    }
}

From source file:com.stratio.decision.functions.SaveToCassandraActionExecutionFunction.java

@Override
public void process(Iterable<StratioStreamingMessage> messages) throws Exception {

    Integer partitionSize = maxBatchSize;

    if (partitionSize <= 0) {
        partitionSize = Iterables.size(messages);
    }//ww  w . j a v  a 2s. c om

    Iterable<List<StratioStreamingMessage>> partitionIterables = Iterables.partition(messages, partitionSize);

    try {

        for (List<StratioStreamingMessage> messageList : partitionIterables) {

            BatchStatement batch = new BatchStatement(batchType);

            for (StratioStreamingMessage stratioStreamingMessage : messageList) {
                Set<String> columns = getColumnSet(stratioStreamingMessage.getColumns());
                if (tablenames.get(stratioStreamingMessage.getStreamName()) == null) {
                    getCassandraTableOperationsService().createTable(stratioStreamingMessage.getStreamName(),
                            stratioStreamingMessage.getColumns(), TIMESTAMP_FIELD);
                    refreshTablenames();
                }
                if (tablenames.get(stratioStreamingMessage.getStreamName()) != columns.hashCode()) {
                    getCassandraTableOperationsService().alterTable(stratioStreamingMessage.getStreamName(),
                            columns, stratioStreamingMessage.getColumns());
                    refreshTablenames();
                }

                batch.add(getCassandraTableOperationsService().createInsertStatement(
                        stratioStreamingMessage.getStreamName(), stratioStreamingMessage.getColumns(),
                        TIMESTAMP_FIELD));
            }

            getSession().execute(batch);
        }

    } catch (Exception e) {
        log.error("Error in Cassandra for batch size {}: {}", Iterables.size(partitionIterables),
                e.getMessage());
    }

}

From source file:org.apache.usergrid.persistence.query.ir.result.InOrderIterator.java

@Override
public void reset() {
    this.iterator = Iterables.partition(uuids, pageSize).iterator();
}

From source file:com.netflix.spinnaker.cats.redis.cache.AbstractRedisCache.java

@Override
public void mergeAll(String type, Collection<CacheData> items) {
    for (List<CacheData> partition : Iterables.partition(items, options.getMaxMergeBatchSize())) {
        mergeItems(type, partition);//w  w w  . j a  v  a 2 s.  c o m
    }
}

From source file:com.google.cloud.dataflow.sdk.util.GroupAlsoByWindowsViaOutputBufferDoFn.java

@Override
public void processElement(DoFn<KV<K, Iterable<WindowedValue<InputT>>>, KV<K, OutputT>>.ProcessContext c)
        throws Exception {
    K key = c.element().getKey();/*from   w  w  w  .  ja v  a  2 s . c  om*/
    // Used with Batch, we know that all the data is available for this key. We can't use the
    // timer manager from the context because it doesn't exist. So we create one and emulate the
    // watermark, knowing that we have all data and it is in timestamp order.
    BatchTimerInternals timerInternals = new BatchTimerInternals(Instant.now());

    // It is the responsibility of the user of GroupAlsoByWindowsViaOutputBufferDoFn to only
    // provide a WindowingInternals instance with the appropriate key type for StateInternals.
    @SuppressWarnings("unchecked")
    StateInternals<K> stateInternals = (StateInternals<K>) c.windowingInternals().stateInternals();

    ReduceFnRunner<K, InputT, OutputT, W> reduceFnRunner = new ReduceFnRunner<K, InputT, OutputT, W>(key,
            strategy, stateInternals, timerInternals, c.windowingInternals(), droppedDueToClosedWindow,
            reduceFn, c.getPipelineOptions());

    Iterable<List<WindowedValue<InputT>>> chunks = Iterables.partition(c.element().getValue(), 1000);
    for (Iterable<WindowedValue<InputT>> chunk : chunks) {
        // Process the chunk of elements.
        reduceFnRunner.processElements(chunk);

        // Then, since elements are sorted by their timestamp, advance the input watermark
        // to the first element, and fire any timers that may have been scheduled.
        timerInternals.advanceInputWatermark(reduceFnRunner, chunk.iterator().next().getTimestamp());

        // Fire any processing timers that need to fire
        timerInternals.advanceProcessingTime(reduceFnRunner, Instant.now());

        // Leave the output watermark undefined. Since there's no late data in batch mode
        // there's really no need to track it as we do for streaming.
    }

    // Finish any pending windows by advancing the input watermark to infinity.
    timerInternals.advanceInputWatermark(reduceFnRunner, BoundedWindow.TIMESTAMP_MAX_VALUE);

    // Finally, advance the processing time to infinity to fire any timers.
    timerInternals.advanceProcessingTime(reduceFnRunner, BoundedWindow.TIMESTAMP_MAX_VALUE);

    reduceFnRunner.persist();
}