List of usage examples for java.util.stream Collectors groupingBy
public static <T, K, A, D> Collector<T, ?, Map<K, D>> groupingBy(Function<? super T, ? extends K> classifier, Collector<? super T, A, D> downstream)
From source file:com.baidu.rigel.biplatform.tesseract.isservice.search.service.impl.CallbackSearchServiceImpl.java
/** * @param context ?// w w w.j a v a2 s .c o m * @param query ? * @return * @throws IndexAndSearchException exception occurred when */ public SearchIndexResultSet query(QueryContext context, QueryRequest query) throws IndexAndSearchException { LOGGER.info(String.format(LogInfoConstants.INFO_PATTERN_FUNCTION_BEGIN, "callbackquery", "[callbackquery:" + query + "]")); if (query == null || context == null || StringUtils.isEmpty(query.getCubeId())) { LOGGER.error(String.format(LogInfoConstants.INFO_PATTERN_FUNCTION_EXCEPTION, "callbackquery", "[callbackquery:" + query + "]")); throw new IndexAndSearchException( TesseractExceptionUtils.getExceptionMessage(IndexAndSearchException.QUERYEXCEPTION_MESSAGE, IndexAndSearchExceptionType.ILLEGALARGUMENT_EXCEPTION), IndexAndSearchExceptionType.ILLEGALARGUMENT_EXCEPTION); } // TODO ??? if (query.getGroupBy() == null || query.getSelect() == null) { return null; } Map<String, String> requestParams = ((QueryContextAdapter) context).getQuestionModel().getRequestParams(); // Build query target map Map<String, List<MiniCubeMeasure>> callbackMeasures = context.getQueryMeasures().stream() .filter(m -> m.getType().equals(MeasureType.CALLBACK)).map(m -> { CallbackMeasure tmp = (CallbackMeasure) m; for (Map.Entry<String, String> entry : tmp.getCallbackParams().entrySet()) { if (requestParams.containsKey(entry.getKey())) { tmp.getCallbackParams().put(entry.getKey(), requestParams.get(entry.getKey())); } } return m; }).collect(Collectors.groupingBy(c -> ((CallbackMeasure) c).getCallbackUrl(), Collectors.toList())); if (callbackMeasures == null || callbackMeasures.isEmpty()) { LOGGER.error(String.format(LogInfoConstants.INFO_PATTERN_FUNCTION_EXCEPTION, "Empty callback measure", "[callbackquery:" + query + "]")); throw new IndexAndSearchException( TesseractExceptionUtils.getExceptionMessage(IndexAndSearchException.QUERYEXCEPTION_MESSAGE, IndexAndSearchExceptionType.ILLEGALARGUMENT_EXCEPTION), IndexAndSearchExceptionType.ILLEGALARGUMENT_EXCEPTION); } LOGGER.info("Find callback targets " + callbackMeasures); // Keep group-by sequence. List<String> groupby = new ArrayList<String>(query.getGroupBy().getGroups()); LinkedHashMap<String, List<String>> groupbyParams = new LinkedHashMap<String, List<String>>(groupby.size()); for (String g : groupby) { groupbyParams.put(g, new ArrayList<String>()); } LinkedHashMap<String, List<String>> whereParams = new LinkedHashMap<String, List<String>>(); for (Expression e : query.getWhere().getAndList()) { List<String> l = e.getQueryValues().stream().filter(v -> !StringUtils.isEmpty(v.getValue())) .map(v -> v.getValue()).collect(Collectors.toList()); if (groupbyParams.containsKey(e.getProperties())) { // if not contains SUMMARY_KEY, add it into group by list if (!l.contains(TesseractConstant.SUMMARY_KEY)) { l.add(TesseractConstant.SUMMARY_KEY); } // Put it into group by field groupbyParams.get(e.getProperties()).addAll(l); } else { // Put it into filter field if (CollectionUtils.isEmpty(l)) { List<Set<String>> tmp = e.getQueryValues().stream().map(v -> v.getLeafValues()) .collect(Collectors.toList()); List<String> values = Lists.newArrayList(); tmp.forEach(t -> values.addAll(t)); whereParams.put(e.getProperties(), values); } else { whereParams.put(e.getProperties(), new ArrayList<String>(l)); } } } // Prepare query tools // CountDownLatch latch = new CountDownLatch(response.size()); // List<Future<CallbackResponse>> results = Lists.newArrayList(); Map<CallbackExecutor, Future<CallbackResponse>> results = Maps.newHashMap(); ExecutorCompletionService<CallbackResponse> service = new ExecutorCompletionService<CallbackResponse>( taskExecutor); StringBuilder callbackMeasureNames = new StringBuilder(); for (Entry<String, List<MiniCubeMeasure>> e : callbackMeasures.entrySet()) { CallbackExecutor ce = new CallbackExecutor(e, groupbyParams, whereParams); results.put(ce, service.submit(ce)); e.getValue().forEach(m -> { callbackMeasureNames.append(" " + m.getCaption() + " "); }); } // } Map<CallbackExecutor, CallbackResponse> response = new ConcurrentHashMap<CallbackExecutor, CallbackResponse>( callbackMeasures.size()); StringBuffer sb = new StringBuffer(); results.forEach((k, v) -> { try { response.put(k, v.get()); } catch (Exception e1) { LOGGER.error(e1.getMessage(), e1); sb.append(": " + callbackMeasureNames.toString() + " ??, ?"); } }); if (!StringUtils.isEmpty(sb.toString())) { if (ThreadLocalPlaceholder.getProperty(ThreadLocalPlaceholder.ERROR_MSG_KEY) != null) { ThreadLocalPlaceholder.unbindProperty(ThreadLocalPlaceholder.ERROR_MSG_KEY); } ThreadLocalPlaceholder.bindProperty(ThreadLocalPlaceholder.ERROR_MSG_KEY, sb.toString()); } // Package result SqlQuery sqlQuery = QueryRequestUtil.transQueryRequest2SqlQuery(query); SearchIndexResultSet result = null; if (!response.isEmpty()) { result = packageResultRecords(query, sqlQuery, response); } else { result = new SearchIndexResultSet(new Meta(query.getGroupBy().getGroups().toArray(new String[0])), 0); } LOGGER.info(String.format(LogInfoConstants.INFO_PATTERN_FUNCTION_END, "query", "[query:" + query + "]")); return result; }
From source file:com.thinkbiganalytics.metadata.jobrepo.nifi.provenance.NifiStatsJmsReceiver.java
private void assignNiFiBulletinErrors(List<JpaNifiFeedProcessorStats> stats) { //might need to query with the 'after' parameter //group the FeedStats by processorId_flowfileId Map<String, Map<String, List<JpaNifiFeedProcessorStats>>> processorFlowFilesStats = stats.stream() .filter(s -> s.getProcessorId() != null) .collect(Collectors.groupingBy(NifiFeedProcessorStats::getProcessorId, Collectors.groupingBy(NifiFeedProcessorStats::getLatestFlowFileId))); Set<String> processorIds = processorFlowFilesStats.keySet(); //strip out those processorIds that are part of a reusable flow Set<String> nonReusableFlowProcessorIds = processorIds.stream() .filter(processorId -> !provenanceEventFeedUtil.isReusableFlowProcessor(processorId)) .collect(Collectors.toSet()); //find all errors for the processors List<BulletinDTO> errors = nifiBulletinExceptionExtractor.getErrorBulletinsForProcessorId(processorIds, lastBulletinId);//w w w . ja v a 2 s.c o m if (errors != null && !errors.isEmpty()) { Set<JpaNifiFeedProcessorStats> statsToUpdate = new HashSet<>(); // first look for matching feed flow and processor ids. otherwise look for processor id matches that are not part of reusable flows errors.stream().forEach(b -> { stats.stream().forEach(stat -> { if (stat.getLatestFlowFileId() != null && b.getSourceId().equalsIgnoreCase(stat.getProcessorId()) && b.getMessage().contains(stat.getLatestFlowFileId())) { stat.setErrorMessageTimestamp(getAdjustBulletinDateTime(b)); stat.setErrorMessages(b.getMessage()); addFeedProcessorError(stat); statsToUpdate.add(stat); } else if (nonReusableFlowProcessorIds.contains(b.getSourceId()) && b.getSourceId().equalsIgnoreCase(stat.getProcessorId())) { stat.setErrorMessageTimestamp(getAdjustBulletinDateTime(b)); stat.setErrorMessages(b.getMessage()); addFeedProcessorError(stat); statsToUpdate.add(stat); } }); }); lastBulletinId = errors.stream().mapToLong(b -> b.getId()).max().getAsLong(); if (!statsToUpdate.isEmpty()) { notifyClusterOfFeedProcessorErrors(statsToUpdate); if (persistErrors) { nifiEventStatisticsProvider.save(new ArrayList<>(statsToUpdate)); } } } }
From source file:com.github.horrorho.liquiddonkey.cloud.Looter.java
void snapshot(HttpClient client, Core core, HttpAgent agent, Backup backup, int id) throws BadDataException, IOException, InterruptedException { boolean toReport = config.debug().toReport(); Path path = config.file().base().resolve(backup.backupUDID()).resolve(config.file().reportsDirectory()); Predicate<ICloud.MBSFile> nonUndecryptableFilter = file -> !file.getAttributes().hasEncryptionKey() || backup.keyBagManager().fileKey(file) != null; // Retrieve file list. int limit = config.client().listLimit(); Snapshot snapshot = agent/*from w w w.j av a 2 s. c om*/ .execute((c, mmeAuthToken) -> Snapshots.from(c, core, mmeAuthToken, backup, id, limit)); if (snapshot == null) { logger.warn("-- snapshot() > snapshot not found: {}", id); return; } ICloud.MBSSnapshotAttributes attr = snapshot.mbsSnapshot().getAttributes(); logger.info("-- snapshot() > files: {}", snapshot.filesCount()); std.println(); std.println( "Retrieving snapshot: " + id + " (" + attr.getDeviceName() + " " + attr.getProductVersion() + ")"); // Total files. std.println("Files(total): " + snapshot.filesCount()); if (toReport) { csvWriter.files(sorted(snapshot), path.resolve("snapshot_" + id + "_files.csv")); } // Mode summary. Map<Mode, Long> modes = snapshot.files().stream() .collect(Collectors.groupingBy(Mode::mode, Collectors.counting())); logger.info("-- snapshot() > modes: {}", modes); // Non-empty files filter. snapshot = Snapshots.from(snapshot, file -> file.getSize() != 0 && file.hasSignature()); logger.info("-- snapshot() > filtered non empty, remaining: {}", snapshot.filesCount()); std.println("Files(non-empty): " + snapshot.filesCount()); // User filter snapshot = Snapshots.from(snapshot, filter); logger.info("-- snapshot() > filtered configured, remaining: {}", snapshot.filesCount()); std.println("Files(filtered): " + snapshot.filesCount()); if (toReport) { csvWriter.files(sorted(snapshot), path.resolve("snapshot_" + id + "_filtered.csv")); } // Undecryptable filter Snapshot undecryptable = Snapshots.from(snapshot, nonUndecryptableFilter.negate()); snapshot = Snapshots.from(snapshot, nonUndecryptableFilter); logger.info("-- snapshot() > filtered undecryptable, remaining: {}", snapshot.filesCount()); std.println("Files(non-undecryptable): " + snapshot.filesCount()); // Dump undecryptables // Map<ICloud.MBSFile, Outcome> undecryptableOutcomes = undecryptables.stream() // .collect(Collectors.toMap(Function.identity(), file -> Outcome.FAILED_DECRYPT_NO_KEY)); // outcomesConsumer.accept(undecryptableOutcomes); if (toReport) { csvWriter.files(sorted(undecryptable), path.resolve("snapshot_" + id + "_undecryptable.csv")); } // Local filter if (config.engine().toForceOverwrite()) { logger.debug("-- snapshot() > forced overwrite"); } else { long a = System.currentTimeMillis(); snapshot = LocalFileFilter.from(snapshot, config.file()).apply(snapshot); long b = System.currentTimeMillis(); logger.info("-- snapshot() > filtered local, remaining: {} delay(ms): {}", snapshot.filesCount(), b - a); std.println("Files(non-local): " + snapshot.filesCount()); } if (snapshot.filesCount() == 0) { return; } // Retrieve Outcomes outcomes = Outcomes.create(); OutcomesProgressPercentage progress = OutcomesProgressPercentage.from(snapshot, std); Consumer<Map<ICloud.MBSFile, Outcome>> outcomesConsumer = outcomes.andThen(progress); std.println(); std.println("Retrieving: " + Bytes.humanize(progress.totalBytes())); // Fetch files SnapshotDownloader.from(config.engine(), config.file()).download(agent, core, snapshot, outcomesConsumer); std.println(); std.println("Completed:"); outcomes.print(std); std.println(); }
From source file:com.mycompany.wolf.Room.java
private void notifyWitchSave() { ScheduledFuture[] holder = new ScheduledFuture[1]; holder[0] = scheduledExecutorService.schedule(() -> { holder[0].cancel(true);//from w w w . j a v a 2 s. co m notifyHunterKillIfDead(); }, WITCH_SAVE_DURATION, TimeUnit.MILLISECONDS); /* * */ List<Map.Entry<String, Long>> top2 = wolfVotings.values().stream() .collect(Collectors.groupingBy(wolfVoting -> wolfVoting.playerId, Collectors.counting())).entrySet() .stream() .sorted(Comparator.comparingLong((Map.Entry<String, Long> entry) -> entry.getValue()).reversed()) .limit(2).collect(Collectors.toList()); if (top2.size() == 1 || (top2.size() > 1 && top2.get(0).getValue().compareTo(top2.get(1).getValue()) > 0)) { theVoted = top2.get(0).getKey(); } else if (top2.size() > 1) { theVoted = top2.get(0).getKey(); } else { List<String> undead = sessions.stream() .filter(session -> !WOLF.equals(session.getUserProperties().get("role"))) .map(session -> getPlayerId(session)).filter(((Predicate<String>) dead::contains).negate()) .collect(Collectors.toList()); theVoted = undead.get(new Random().nextInt(undead.size())); } newlyDead.add(theVoted); /* * ? */ witchPoisonings.values().stream().map(witchPoisoning -> witchPoisoning.playerId).forEach(newlyDead::add); Map<String, Object> notifyWolfVoted = ImmutableMap.of("code", "notifyWolfVoted", "properties", ImmutableMap.of("playerId", theVoted)); String jsonText = JsonUtils.toString(notifyWolfVoted); sessions.stream().forEach(s -> { s.getAsyncRemote().sendText(jsonText); }); }
From source file:nu.yona.server.analysis.service.ActivityService.java
private Map<LocalDate, Set<WeekActivity>> getWeekActivitiesGroupedByDate(UUID userAnonymizedId, Interval interval) {//from www.ja v a 2s . co m Set<WeekActivity> weekActivityEntities = weekActivityRepository.findAll(userAnonymizedId, interval.startDate, interval.endDate); return weekActivityEntities.stream() .collect(Collectors.groupingBy(IntervalActivity::getStartDate, Collectors.toSet())); }
From source file:com.uber.hoodie.index.bloom.TestHoodieBloomIndex.java
@Test public void testRangePruning() { HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).build(); HoodieBloomIndex index = new HoodieBloomIndex(config); final Map<String, List<BloomIndexFileInfo>> partitionToFileIndexInfo = new HashMap<>(); partitionToFileIndexInfo.put("2017/10/22", Arrays.asList(new BloomIndexFileInfo("f1"), new BloomIndexFileInfo("f2", "000", "000"), new BloomIndexFileInfo("f3", "001", "003"), new BloomIndexFileInfo("f4", "002", "007"), new BloomIndexFileInfo("f5", "009", "010"))); JavaPairRDD<String, String> partitionRecordKeyPairRDD = jsc .parallelize(Arrays.asList(new Tuple2<>("2017/10/22", "003"), new Tuple2<>("2017/10/22", "002"), new Tuple2<>("2017/10/22", "005"), new Tuple2<>("2017/10/22", "004"))) .mapToPair(t -> t);//from w w w . j a v a2 s.co m List<Tuple2<String, Tuple2<String, HoodieKey>>> comparisonKeyList = index .explodeRecordRDDWithFileComparisons(partitionToFileIndexInfo, partitionRecordKeyPairRDD).collect(); assertEquals(10, comparisonKeyList.size()); Map<String, List<String>> recordKeyToFileComps = comparisonKeyList.stream() .collect(Collectors.groupingBy(t -> t._2()._2().getRecordKey(), Collectors.mapping(t -> t._2()._1().split("#")[0], Collectors.toList()))); assertEquals(4, recordKeyToFileComps.size()); assertEquals(Arrays.asList("f1", "f3", "f4"), recordKeyToFileComps.get("002")); assertEquals(Arrays.asList("f1", "f3", "f4"), recordKeyToFileComps.get("003")); assertEquals(Arrays.asList("f1", "f4"), recordKeyToFileComps.get("004")); assertEquals(Arrays.asList("f1", "f4"), recordKeyToFileComps.get("005")); }
From source file:io.dropwizard.sharding.dao.LookupDao.java
/** * Queries across various shards and returns the results. * <b>Note:</b> This method runs the query serially and is efficient over scatterGather and serial get of all key * @param keys The list of lookup keys//w w w .jav a2 s . c o m * @return List of elements or empty if none match */ public List<T> get(List<String> keys) { Map<Integer, List<String>> lookupKeysGroupByShards = keys.stream().collect(Collectors.groupingBy( key -> ShardCalculator.shardId(shardManager, bucketIdExtractor, key), Collectors.toList())); return lookupKeysGroupByShards.keySet().stream().map(shardId -> { try { DetachedCriteria criteria = DetachedCriteria.forClass(entityClass) .add(Restrictions.in(keyField.getName(), lookupKeysGroupByShards.get(shardId))); return Transactions.execute(daos.get(shardId).sessionFactory, true, daos.get(shardId)::select, criteria); } catch (Exception e) { throw new RuntimeException(e); } }).flatMap(Collection::stream).collect(Collectors.toList()); }
From source file:com.netflix.spinnaker.clouddriver.titus.client.RegionScopedTitusClient.java
@Override public Map<String, List<String>> getTaskIdsForJobIds() { String filterByStates = "Launched,StartInitiated,Started"; TaskQuery.Builder taskQueryBuilder = TaskQuery.newBuilder(); taskQueryBuilder.putFilteringCriteria("attributes", "source:spinnaker") .putFilteringCriteria("taskStates", filterByStates).addFields("id").addFields("jobId"); List<com.netflix.titus.grpc.protogen.Task> grpcTasks = getTasksWithFilter(taskQueryBuilder, 10000); return grpcTasks.stream().collect(Collectors.groupingBy(com.netflix.titus.grpc.protogen.Task::getJobId, mapping(com.netflix.titus.grpc.protogen.Task::getId, toList()))); }
From source file:com.ikanow.aleph2.graph.titan.utils.TitanGraphBuildingUtils.java
/** Utility to get the vertices in the DB matching the specified keys TODO: move to intermediate utils * @param keys/*from w ww. j av a2 s. co m*/ * @param bucket_filter * @return */ @SuppressWarnings("unchecked") public static final Map<JsonNode, List<Vertex>> getGroupedVertices(final Collection<ObjectNode> keys, final TitanTransaction tx, final List<String> key_fields, final Predicate<Vertex> vertex_filter) { final Stream<TitanVertex> dups = Lambdas.get(() -> { final Map<String, Set<Object>> dedup_query_builder = keys.stream() .flatMap(j -> Optionals.streamOf(j.fields(), false)) .collect(Collectors.groupingBy(kv -> kv.getKey(), Collectors.mapping(kv -> jsonNodeToObject(kv.getValue()), Collectors.toSet()))); ; //TODO (ALEPH-15): would be nice to support custom "fuzzier" queries, since we're doing a dedup stage to pick the actual winning vertices anyway // that way you could say query on tokenized-version of name and get anyone with the same first or last name (say) and then pick the most likely // one based on the graph ... of course you'd probably want the full graph for that, so it might end up being better served as a "self-analytic" to do is part // of post processing? // (NOTE: same remarks apply for edges) // (NOTE: currently I've been going in the opposite direction, ie enforcing only one vertex per keyset per bucket ... otherwise it's going to get really // confusing when you try to merge all the different versions that Titan creates because of the lack of an upsert function....) final TitanGraphQuery<?> matching_nodes_query = dedup_query_builder.entrySet().stream().reduce( tx.query(), (query, kv) -> query.has(kv.getKey(), Contain.IN, kv.getValue()), (query1, query2) -> query1 // (can't occur since reduce not parallel) ); return Optionals.streamOf(matching_nodes_query.vertices(), false); }); // Remove false positives, un-authorized nodes, and group by key final Map<JsonNode, List<Vertex>> grouped_vertices = dups .map(vertex -> Tuples._2T((Vertex) vertex, getElementProperties(vertex, key_fields))) .filter(vertex_key -> keys.contains(vertex_key._2())) // (remove false positives) .filter(vertex_key -> vertex_filter.test(vertex_key._1())) // (remove un-authorized nodes) .collect(Collectors.groupingBy(t2 -> (JsonNode) t2._2(), // (group by key) Collectors.mapping(t2 -> t2._1(), Collectors.toList()))); return grouped_vertices; }