List of usage examples for com.google.common.collect Iterables partition
public static <T> Iterable<List<T>> partition(final Iterable<T> iterable, final int size)
From source file:com.cloudera.nav.sdk.client.MetadataResultIterator.java
public MetadataResultIterator(NavApiCient client, MetadataType type, String query, Integer limit, Iterable<String> extractorRunIds) { this.client = client; this.type = type; this.userQuery = query; this.limit = limit; this.partitionRunIdIterator = Iterables.partition(extractorRunIds, MAX_QUERY_PARTITION_SIZE).iterator(); if (Iterables.isEmpty(extractorRunIds)) { nextQuery = userQuery;// w w w .j a va 2s . c o m } else { getNextQuery(); } getNextBatch(); }
From source file:com.cloudera.science.quince.PrintSamplesTool.java
@Override public int run(String[] args) throws Exception { JCommander jc = new JCommander(this); try {//from w w w . j ava2 s. com jc.parse(args); } catch (ParameterException e) { jc.usage(); return 1; } if (paths == null || paths.size() != 1) { jc.usage(); return 1; } String inputPathString = paths.get(0); Configuration conf = getConf(); Path inputPath = new Path(inputPathString); Path[] vcfs = FileUtils.findVcfs(inputPath, conf); Set<String> samples = SampleUtils.uniqueSamples(conf, vcfs); if (samplesPerLine == 0) { // all on one line System.out.println(Joiner.on(',').join(samples)); } else { for (List<String> line : Iterables.partition(samples, samplesPerLine)) { System.out.println(Joiner.on(',').join(line)); } } return 0; }
From source file:com.palantir.common.base.BatchingVisitableFromIterable.java
@Override public <K extends Exception> void batchAcceptSizeHint(int batchSize, ConsistentVisitor<T, K> v) throws K { /*/*from w ww .j a v a 2s . c o m*/ * Iterables.partition allocates an array of size batchSize, so avoid an OOM by making sure * it's not too big. */ batchSize = Math.min(batchSize, MAX_BATCH_SIZE); for (List<T> list : Iterables.partition(iterable, batchSize)) { if (!v.visit(list)) { return; } } }
From source file:com.stratio.decision.functions.SaveToMongoActionExecutionFunction.java
@Override public void process(Iterable<StratioStreamingMessage> messages) throws Exception { Integer partitionSize = maxBatchSize; if (partitionSize == null || partitionSize <= 0) { partitionSize = Iterables.size(messages); }//w w w .j av a 2 s . c om Iterable<List<StratioStreamingMessage>> partitionIterables = Iterables.partition(messages, partitionSize); try { for (List<StratioStreamingMessage> messageList : partitionIterables) { Map<String, BulkWriteOperation> elementsToInsert = new HashMap<String, BulkWriteOperation>(); for (StratioStreamingMessage event : messageList) { BasicDBObject object = new BasicDBObject(TIMESTAMP_FIELD, event.getTimestamp()); for (ColumnNameTypeValue columnNameTypeValue : event.getColumns()) { object.append(columnNameTypeValue.getColumn(), columnNameTypeValue.getValue()); } BulkWriteOperation bulkInsertOperation = elementsToInsert.get(event.getStreamName()); if (bulkInsertOperation == null) { bulkInsertOperation = getDB().getCollection(event.getStreamName()) .initializeUnorderedBulkOperation(); elementsToInsert.put(event.getStreamName(), bulkInsertOperation); getDB().getCollection(event.getStreamName()) .createIndex(new BasicDBObject(TIMESTAMP_FIELD, -1)); } bulkInsertOperation.insert(object); } for (Entry<String, BulkWriteOperation> stratioStreamingMessage : elementsToInsert.entrySet()) { stratioStreamingMessage.getValue().execute(); } } } catch (Exception e) { log.error("Error saving in Mongo: " + e.getMessage()); } }
From source file:terrastore.util.collect.parallel.ParallelUtils.java
public static <I, O, C extends Collection> C parallelSliceMap(final Collection<I> input, int sliceSize, final MapTask<I, O> mapper, final MapCollector<O, C> collector, ExecutorService executor) throws ParallelExecutionException { try {//from w ww . j a v a 2s. com Iterable<List<I>> slices = Iterables.partition(input, sliceSize); List<Callable<List<O>>> tasks = new LinkedList<Callable<List<O>>>(); for (final List<I> slice : slices) { tasks.add(new Callable<List<O>>() { @Override public List<O> call() throws ParallelExecutionException { List<O> outputs = new ArrayList<O>(slice.size()); for (I current : slice) { O result = mapper.map(current); if (result != null) { outputs.add(result); } } return outputs; } }); } List<Future<List<O>>> results = executor.invokeAll(tasks); List<O> outputs = new ArrayList<O>(results.size()); for (Future<List<O>> future : results) { List<O> result = future.get(); if (result != null) { outputs.addAll(result); } } return collector.collect(outputs); } catch (ExecutionException ex) { if (ex.getCause() instanceof ParallelExecutionException) { throw (ParallelExecutionException) ex.getCause(); } else { throw new ParallelExecutionException(ex.getCause()); } } catch (InterruptedException ex) { throw new ParallelExecutionException(ex.getCause()); } }
From source file:org.jclouds.cloudwatch.CloudWatch.java
/** * Pushes metrics to CloudWatch./* w ww . ja v a2 s .c om*/ * * @param cloudWatchApi the {@link CloudWatchApi} to use for the request * @param region the region to put the metrics in * @param metrics the metrics to publish * @param namespace the namespace to publish the metrics in */ public static void putMetricData(CloudWatchApi cloudWatchApi, String region, Iterable<MetricDatum> metrics, String namespace) { MetricApi metricApi = cloudWatchApi.getMetricApiForRegion(region); for (List<MetricDatum> slice : Iterables.partition(metrics, 10)) { metricApi.putMetricsInNamespace(slice, namespace); } }
From source file:com.stratio.decision.functions.SaveToCassandraActionExecutionFunction.java
@Override public void process(Iterable<StratioStreamingMessage> messages) throws Exception { Integer partitionSize = maxBatchSize; if (partitionSize <= 0) { partitionSize = Iterables.size(messages); }//ww w . j a v a 2s. c om Iterable<List<StratioStreamingMessage>> partitionIterables = Iterables.partition(messages, partitionSize); try { for (List<StratioStreamingMessage> messageList : partitionIterables) { BatchStatement batch = new BatchStatement(batchType); for (StratioStreamingMessage stratioStreamingMessage : messageList) { Set<String> columns = getColumnSet(stratioStreamingMessage.getColumns()); if (tablenames.get(stratioStreamingMessage.getStreamName()) == null) { getCassandraTableOperationsService().createTable(stratioStreamingMessage.getStreamName(), stratioStreamingMessage.getColumns(), TIMESTAMP_FIELD); refreshTablenames(); } if (tablenames.get(stratioStreamingMessage.getStreamName()) != columns.hashCode()) { getCassandraTableOperationsService().alterTable(stratioStreamingMessage.getStreamName(), columns, stratioStreamingMessage.getColumns()); refreshTablenames(); } batch.add(getCassandraTableOperationsService().createInsertStatement( stratioStreamingMessage.getStreamName(), stratioStreamingMessage.getColumns(), TIMESTAMP_FIELD)); } getSession().execute(batch); } } catch (Exception e) { log.error("Error in Cassandra for batch size {}: {}", Iterables.size(partitionIterables), e.getMessage()); } }
From source file:org.apache.usergrid.persistence.query.ir.result.InOrderIterator.java
@Override public void reset() { this.iterator = Iterables.partition(uuids, pageSize).iterator(); }
From source file:com.netflix.spinnaker.cats.redis.cache.AbstractRedisCache.java
@Override public void mergeAll(String type, Collection<CacheData> items) { for (List<CacheData> partition : Iterables.partition(items, options.getMaxMergeBatchSize())) { mergeItems(type, partition);//w w w . j a v a 2 s. c o m } }
From source file:com.google.cloud.dataflow.sdk.util.GroupAlsoByWindowsViaOutputBufferDoFn.java
@Override public void processElement(DoFn<KV<K, Iterable<WindowedValue<InputT>>>, KV<K, OutputT>>.ProcessContext c) throws Exception { K key = c.element().getKey();/*from w w w . ja v a 2 s . c om*/ // Used with Batch, we know that all the data is available for this key. We can't use the // timer manager from the context because it doesn't exist. So we create one and emulate the // watermark, knowing that we have all data and it is in timestamp order. BatchTimerInternals timerInternals = new BatchTimerInternals(Instant.now()); // It is the responsibility of the user of GroupAlsoByWindowsViaOutputBufferDoFn to only // provide a WindowingInternals instance with the appropriate key type for StateInternals. @SuppressWarnings("unchecked") StateInternals<K> stateInternals = (StateInternals<K>) c.windowingInternals().stateInternals(); ReduceFnRunner<K, InputT, OutputT, W> reduceFnRunner = new ReduceFnRunner<K, InputT, OutputT, W>(key, strategy, stateInternals, timerInternals, c.windowingInternals(), droppedDueToClosedWindow, reduceFn, c.getPipelineOptions()); Iterable<List<WindowedValue<InputT>>> chunks = Iterables.partition(c.element().getValue(), 1000); for (Iterable<WindowedValue<InputT>> chunk : chunks) { // Process the chunk of elements. reduceFnRunner.processElements(chunk); // Then, since elements are sorted by their timestamp, advance the input watermark // to the first element, and fire any timers that may have been scheduled. timerInternals.advanceInputWatermark(reduceFnRunner, chunk.iterator().next().getTimestamp()); // Fire any processing timers that need to fire timerInternals.advanceProcessingTime(reduceFnRunner, Instant.now()); // Leave the output watermark undefined. Since there's no late data in batch mode // there's really no need to track it as we do for streaming. } // Finish any pending windows by advancing the input watermark to infinity. timerInternals.advanceInputWatermark(reduceFnRunner, BoundedWindow.TIMESTAMP_MAX_VALUE); // Finally, advance the processing time to infinity to fire any timers. timerInternals.advanceProcessingTime(reduceFnRunner, BoundedWindow.TIMESTAMP_MAX_VALUE); reduceFnRunner.persist(); }