Example usage for java.util.stream Collectors groupingBy

Introduction

In this page you can find the example usage for java.util.stream Collectors groupingBy.

Prototype

public static <T, K, A, D> Collector<T, ?, Map<K, D>> groupingBy(Function<? super T, ? extends K> classifier,
        Collector<? super T, A, D> downstream)

Source Link

Document

Returns a Collector implementing a cascaded "group by" operation on input elements of type T , grouping elements according to a classification function, and then performing a reduction operation on the values associated with a given key using the specified downstream Collector .

Usage

From source file:com.baidu.rigel.biplatform.tesseract.isservice.search.service.impl.CallbackSearchServiceImpl.java

/**
 * @param context ?// w  w w.j a v  a2 s .c o m
 * @param query ?
 * @return 
 * @throws IndexAndSearchException exception occurred when 
 */
public SearchIndexResultSet query(QueryContext context, QueryRequest query) throws IndexAndSearchException {
    LOGGER.info(String.format(LogInfoConstants.INFO_PATTERN_FUNCTION_BEGIN, "callbackquery",
            "[callbackquery:" + query + "]"));
    if (query == null || context == null || StringUtils.isEmpty(query.getCubeId())) {
        LOGGER.error(String.format(LogInfoConstants.INFO_PATTERN_FUNCTION_EXCEPTION, "callbackquery",
                "[callbackquery:" + query + "]"));
        throw new IndexAndSearchException(
                TesseractExceptionUtils.getExceptionMessage(IndexAndSearchException.QUERYEXCEPTION_MESSAGE,
                        IndexAndSearchExceptionType.ILLEGALARGUMENT_EXCEPTION),
                IndexAndSearchExceptionType.ILLEGALARGUMENT_EXCEPTION);
    }
    // TODO ???
    if (query.getGroupBy() == null || query.getSelect() == null) {
        return null;
    }
    Map<String, String> requestParams = ((QueryContextAdapter) context).getQuestionModel().getRequestParams();
    // Build query target map
    Map<String, List<MiniCubeMeasure>> callbackMeasures = context.getQueryMeasures().stream()
            .filter(m -> m.getType().equals(MeasureType.CALLBACK)).map(m -> {
                CallbackMeasure tmp = (CallbackMeasure) m;
                for (Map.Entry<String, String> entry : tmp.getCallbackParams().entrySet()) {
                    if (requestParams.containsKey(entry.getKey())) {
                        tmp.getCallbackParams().put(entry.getKey(), requestParams.get(entry.getKey()));
                    }
                }
                return m;
            }).collect(Collectors.groupingBy(c -> ((CallbackMeasure) c).getCallbackUrl(), Collectors.toList()));
    if (callbackMeasures == null || callbackMeasures.isEmpty()) {
        LOGGER.error(String.format(LogInfoConstants.INFO_PATTERN_FUNCTION_EXCEPTION, "Empty callback measure",
                "[callbackquery:" + query + "]"));
        throw new IndexAndSearchException(
                TesseractExceptionUtils.getExceptionMessage(IndexAndSearchException.QUERYEXCEPTION_MESSAGE,
                        IndexAndSearchExceptionType.ILLEGALARGUMENT_EXCEPTION),
                IndexAndSearchExceptionType.ILLEGALARGUMENT_EXCEPTION);
    }
    LOGGER.info("Find callback targets " + callbackMeasures);

    // Keep group-by sequence.
    List<String> groupby = new ArrayList<String>(query.getGroupBy().getGroups());
    LinkedHashMap<String, List<String>> groupbyParams = new LinkedHashMap<String, List<String>>(groupby.size());
    for (String g : groupby) {
        groupbyParams.put(g, new ArrayList<String>());
    }

    LinkedHashMap<String, List<String>> whereParams = new LinkedHashMap<String, List<String>>();
    for (Expression e : query.getWhere().getAndList()) {
        List<String> l = e.getQueryValues().stream().filter(v -> !StringUtils.isEmpty(v.getValue()))
                .map(v -> v.getValue()).collect(Collectors.toList());
        if (groupbyParams.containsKey(e.getProperties())) {
            // if not contains SUMMARY_KEY, add it into group by list
            if (!l.contains(TesseractConstant.SUMMARY_KEY)) {
                l.add(TesseractConstant.SUMMARY_KEY);
            }
            // Put it into group by field
            groupbyParams.get(e.getProperties()).addAll(l);
        } else {
            // Put it into filter field
            if (CollectionUtils.isEmpty(l)) {
                List<Set<String>> tmp = e.getQueryValues().stream().map(v -> v.getLeafValues())
                        .collect(Collectors.toList());
                List<String> values = Lists.newArrayList();
                tmp.forEach(t -> values.addAll(t));
                whereParams.put(e.getProperties(), values);
            } else {
                whereParams.put(e.getProperties(), new ArrayList<String>(l));
            }
        }
    }

    // Prepare query tools
    //        CountDownLatch latch = new CountDownLatch(response.size());
    //        List<Future<CallbackResponse>> results = Lists.newArrayList();
    Map<CallbackExecutor, Future<CallbackResponse>> results = Maps.newHashMap();
    ExecutorCompletionService<CallbackResponse> service = new ExecutorCompletionService<CallbackResponse>(
            taskExecutor);
    StringBuilder callbackMeasureNames = new StringBuilder();
    for (Entry<String, List<MiniCubeMeasure>> e : callbackMeasures.entrySet()) {
        CallbackExecutor ce = new CallbackExecutor(e, groupbyParams, whereParams);
        results.put(ce, service.submit(ce));
        e.getValue().forEach(m -> {
            callbackMeasureNames.append(" " + m.getCaption() + " ");
        });
    }
    //        }
    Map<CallbackExecutor, CallbackResponse> response = new ConcurrentHashMap<CallbackExecutor, CallbackResponse>(
            callbackMeasures.size());
    StringBuffer sb = new StringBuffer();
    results.forEach((k, v) -> {
        try {
            response.put(k, v.get());
        } catch (Exception e1) {
            LOGGER.error(e1.getMessage(), e1);
            sb.append(": " + callbackMeasureNames.toString()
                    + " ??, ?");
        }
    });
    if (!StringUtils.isEmpty(sb.toString())) {
        if (ThreadLocalPlaceholder.getProperty(ThreadLocalPlaceholder.ERROR_MSG_KEY) != null) {
            ThreadLocalPlaceholder.unbindProperty(ThreadLocalPlaceholder.ERROR_MSG_KEY);
        }
        ThreadLocalPlaceholder.bindProperty(ThreadLocalPlaceholder.ERROR_MSG_KEY, sb.toString());
    }
    // Package result
    SqlQuery sqlQuery = QueryRequestUtil.transQueryRequest2SqlQuery(query);
    SearchIndexResultSet result = null;
    if (!response.isEmpty()) {
        result = packageResultRecords(query, sqlQuery, response);
    } else {
        result = new SearchIndexResultSet(new Meta(query.getGroupBy().getGroups().toArray(new String[0])), 0);
    }

    LOGGER.info(String.format(LogInfoConstants.INFO_PATTERN_FUNCTION_END, "query", "[query:" + query + "]"));
    return result;
}

From source file:com.thinkbiganalytics.metadata.jobrepo.nifi.provenance.NifiStatsJmsReceiver.java

private void assignNiFiBulletinErrors(List<JpaNifiFeedProcessorStats> stats) {

    //might need to query with the 'after' parameter

    //group the FeedStats by processorId_flowfileId

    Map<String, Map<String, List<JpaNifiFeedProcessorStats>>> processorFlowFilesStats = stats.stream()
            .filter(s -> s.getProcessorId() != null)
            .collect(Collectors.groupingBy(NifiFeedProcessorStats::getProcessorId,
                    Collectors.groupingBy(NifiFeedProcessorStats::getLatestFlowFileId)));

    Set<String> processorIds = processorFlowFilesStats.keySet();
    //strip out those processorIds that are part of a reusable flow
    Set<String> nonReusableFlowProcessorIds = processorIds.stream()
            .filter(processorId -> !provenanceEventFeedUtil.isReusableFlowProcessor(processorId))
            .collect(Collectors.toSet());

    //find all errors for the processors
    List<BulletinDTO> errors = nifiBulletinExceptionExtractor.getErrorBulletinsForProcessorId(processorIds,
            lastBulletinId);//w w w  . ja v a 2 s.c  o  m

    if (errors != null && !errors.isEmpty()) {
        Set<JpaNifiFeedProcessorStats> statsToUpdate = new HashSet<>();
        // first look for matching feed flow and processor ids.  otherwise look for processor id matches that are not part of reusable flows
        errors.stream().forEach(b -> {
            stats.stream().forEach(stat -> {
                if (stat.getLatestFlowFileId() != null
                        && b.getSourceId().equalsIgnoreCase(stat.getProcessorId())
                        && b.getMessage().contains(stat.getLatestFlowFileId())) {
                    stat.setErrorMessageTimestamp(getAdjustBulletinDateTime(b));
                    stat.setErrorMessages(b.getMessage());
                    addFeedProcessorError(stat);
                    statsToUpdate.add(stat);
                } else if (nonReusableFlowProcessorIds.contains(b.getSourceId())
                        && b.getSourceId().equalsIgnoreCase(stat.getProcessorId())) {
                    stat.setErrorMessageTimestamp(getAdjustBulletinDateTime(b));
                    stat.setErrorMessages(b.getMessage());
                    addFeedProcessorError(stat);
                    statsToUpdate.add(stat);
                }
            });
        });
        lastBulletinId = errors.stream().mapToLong(b -> b.getId()).max().getAsLong();

        if (!statsToUpdate.isEmpty()) {
            notifyClusterOfFeedProcessorErrors(statsToUpdate);
            if (persistErrors) {
                nifiEventStatisticsProvider.save(new ArrayList<>(statsToUpdate));
            }
        }
    }
}

From source file:com.github.horrorho.liquiddonkey.cloud.Looter.java

void snapshot(HttpClient client, Core core, HttpAgent agent, Backup backup, int id)
        throws BadDataException, IOException, InterruptedException {

    boolean toReport = config.debug().toReport();
    Path path = config.file().base().resolve(backup.backupUDID()).resolve(config.file().reportsDirectory());
    Predicate<ICloud.MBSFile> nonUndecryptableFilter = file -> !file.getAttributes().hasEncryptionKey()
            || backup.keyBagManager().fileKey(file) != null;

    // Retrieve file list.
    int limit = config.client().listLimit();
    Snapshot snapshot = agent/*from  w  w  w.j  av a  2 s. c om*/
            .execute((c, mmeAuthToken) -> Snapshots.from(c, core, mmeAuthToken, backup, id, limit));

    if (snapshot == null) {
        logger.warn("-- snapshot() > snapshot not found: {}", id);
        return;
    }
    ICloud.MBSSnapshotAttributes attr = snapshot.mbsSnapshot().getAttributes();
    logger.info("-- snapshot() > files: {}", snapshot.filesCount());
    std.println();
    std.println(
            "Retrieving snapshot: " + id + " (" + attr.getDeviceName() + " " + attr.getProductVersion() + ")");

    // Total files.
    std.println("Files(total): " + snapshot.filesCount());
    if (toReport) {
        csvWriter.files(sorted(snapshot), path.resolve("snapshot_" + id + "_files.csv"));
    }

    // Mode summary.
    Map<Mode, Long> modes = snapshot.files().stream()
            .collect(Collectors.groupingBy(Mode::mode, Collectors.counting()));
    logger.info("-- snapshot() > modes: {}", modes);

    // Non-empty files filter.
    snapshot = Snapshots.from(snapshot, file -> file.getSize() != 0 && file.hasSignature());
    logger.info("-- snapshot() > filtered non empty, remaining: {}", snapshot.filesCount());
    std.println("Files(non-empty): " + snapshot.filesCount());

    // User filter
    snapshot = Snapshots.from(snapshot, filter);
    logger.info("-- snapshot() > filtered configured, remaining: {}", snapshot.filesCount());
    std.println("Files(filtered): " + snapshot.filesCount());
    if (toReport) {
        csvWriter.files(sorted(snapshot), path.resolve("snapshot_" + id + "_filtered.csv"));
    }

    // Undecryptable filter
    Snapshot undecryptable = Snapshots.from(snapshot, nonUndecryptableFilter.negate());
    snapshot = Snapshots.from(snapshot, nonUndecryptableFilter);
    logger.info("-- snapshot() > filtered undecryptable, remaining: {}", snapshot.filesCount());
    std.println("Files(non-undecryptable): " + snapshot.filesCount());

    // Dump undecryptables
    //        Map<ICloud.MBSFile, Outcome> undecryptableOutcomes = undecryptables.stream()
    //                .collect(Collectors.toMap(Function.identity(), file -> Outcome.FAILED_DECRYPT_NO_KEY));
    //        outcomesConsumer.accept(undecryptableOutcomes);
    if (toReport) {
        csvWriter.files(sorted(undecryptable), path.resolve("snapshot_" + id + "_undecryptable.csv"));
    }

    // Local filter
    if (config.engine().toForceOverwrite()) {
        logger.debug("-- snapshot() > forced overwrite");
    } else {
        long a = System.currentTimeMillis();
        snapshot = LocalFileFilter.from(snapshot, config.file()).apply(snapshot);
        long b = System.currentTimeMillis();
        logger.info("-- snapshot() > filtered local, remaining: {} delay(ms): {}", snapshot.filesCount(),
                b - a);
        std.println("Files(non-local): " + snapshot.filesCount());
    }

    if (snapshot.filesCount() == 0) {
        return;
    }

    // Retrieve
    Outcomes outcomes = Outcomes.create();
    OutcomesProgressPercentage progress = OutcomesProgressPercentage.from(snapshot, std);
    Consumer<Map<ICloud.MBSFile, Outcome>> outcomesConsumer = outcomes.andThen(progress);
    std.println();
    std.println("Retrieving: " + Bytes.humanize(progress.totalBytes()));

    // Fetch files
    SnapshotDownloader.from(config.engine(), config.file()).download(agent, core, snapshot, outcomesConsumer);

    std.println();
    std.println("Completed:");
    outcomes.print(std);
    std.println();
}

From source file:com.mycompany.wolf.Room.java

private void notifyWitchSave() {
    ScheduledFuture[] holder = new ScheduledFuture[1];
    holder[0] = scheduledExecutorService.schedule(() -> {
        holder[0].cancel(true);//from w w w . j a v  a 2  s. co  m
        notifyHunterKillIfDead();
    }, WITCH_SAVE_DURATION, TimeUnit.MILLISECONDS);

    /*
     *  
     */
    List<Map.Entry<String, Long>> top2 = wolfVotings.values().stream()
            .collect(Collectors.groupingBy(wolfVoting -> wolfVoting.playerId, Collectors.counting())).entrySet()
            .stream()
            .sorted(Comparator.comparingLong((Map.Entry<String, Long> entry) -> entry.getValue()).reversed())
            .limit(2).collect(Collectors.toList());
    if (top2.size() == 1 || (top2.size() > 1 && top2.get(0).getValue().compareTo(top2.get(1).getValue()) > 0)) {
        theVoted = top2.get(0).getKey();
    } else if (top2.size() > 1) {
        theVoted = top2.get(0).getKey();
    } else {
        List<String> undead = sessions.stream()
                .filter(session -> !WOLF.equals(session.getUserProperties().get("role")))
                .map(session -> getPlayerId(session)).filter(((Predicate<String>) dead::contains).negate())
                .collect(Collectors.toList());
        theVoted = undead.get(new Random().nextInt(undead.size()));
    }
    newlyDead.add(theVoted);

    /*
     * ? 
     */
    witchPoisonings.values().stream().map(witchPoisoning -> witchPoisoning.playerId).forEach(newlyDead::add);

    Map<String, Object> notifyWolfVoted = ImmutableMap.of("code", "notifyWolfVoted", "properties",
            ImmutableMap.of("playerId", theVoted));
    String jsonText = JsonUtils.toString(notifyWolfVoted);
    sessions.stream().forEach(s -> {
        s.getAsyncRemote().sendText(jsonText);
    });
}

From source file:nu.yona.server.analysis.service.ActivityService.java

private Map<LocalDate, Set<WeekActivity>> getWeekActivitiesGroupedByDate(UUID userAnonymizedId,
        Interval interval) {//from   www.ja  v  a 2s . co  m
    Set<WeekActivity> weekActivityEntities = weekActivityRepository.findAll(userAnonymizedId,
            interval.startDate, interval.endDate);
    return weekActivityEntities.stream()
            .collect(Collectors.groupingBy(IntervalActivity::getStartDate, Collectors.toSet()));
}

From source file:com.uber.hoodie.index.bloom.TestHoodieBloomIndex.java

@Test
public void testRangePruning() {

    HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).build();
    HoodieBloomIndex index = new HoodieBloomIndex(config);

    final Map<String, List<BloomIndexFileInfo>> partitionToFileIndexInfo = new HashMap<>();
    partitionToFileIndexInfo.put("2017/10/22",
            Arrays.asList(new BloomIndexFileInfo("f1"), new BloomIndexFileInfo("f2", "000", "000"),
                    new BloomIndexFileInfo("f3", "001", "003"), new BloomIndexFileInfo("f4", "002", "007"),
                    new BloomIndexFileInfo("f5", "009", "010")));

    JavaPairRDD<String, String> partitionRecordKeyPairRDD = jsc
            .parallelize(Arrays.asList(new Tuple2<>("2017/10/22", "003"), new Tuple2<>("2017/10/22", "002"),
                    new Tuple2<>("2017/10/22", "005"), new Tuple2<>("2017/10/22", "004")))
            .mapToPair(t -> t);//from  w  w w . j a  v a2 s.co  m

    List<Tuple2<String, Tuple2<String, HoodieKey>>> comparisonKeyList = index
            .explodeRecordRDDWithFileComparisons(partitionToFileIndexInfo, partitionRecordKeyPairRDD).collect();

    assertEquals(10, comparisonKeyList.size());
    Map<String, List<String>> recordKeyToFileComps = comparisonKeyList.stream()
            .collect(Collectors.groupingBy(t -> t._2()._2().getRecordKey(),
                    Collectors.mapping(t -> t._2()._1().split("#")[0], Collectors.toList())));

    assertEquals(4, recordKeyToFileComps.size());
    assertEquals(Arrays.asList("f1", "f3", "f4"), recordKeyToFileComps.get("002"));
    assertEquals(Arrays.asList("f1", "f3", "f4"), recordKeyToFileComps.get("003"));
    assertEquals(Arrays.asList("f1", "f4"), recordKeyToFileComps.get("004"));
    assertEquals(Arrays.asList("f1", "f4"), recordKeyToFileComps.get("005"));
}

From source file:io.dropwizard.sharding.dao.LookupDao.java

/**
 * Queries across various shards and returns the results.
 * <b>Note:</b> This method runs the query serially and is efficient over scatterGather and serial get of all key
 * @param keys The list of lookup keys//w w w .jav a2 s  .  c o  m
 * @return List of elements or empty if none match
 */
public List<T> get(List<String> keys) {
    Map<Integer, List<String>> lookupKeysGroupByShards = keys.stream().collect(Collectors.groupingBy(
            key -> ShardCalculator.shardId(shardManager, bucketIdExtractor, key), Collectors.toList()));

    return lookupKeysGroupByShards.keySet().stream().map(shardId -> {
        try {
            DetachedCriteria criteria = DetachedCriteria.forClass(entityClass)
                    .add(Restrictions.in(keyField.getName(), lookupKeysGroupByShards.get(shardId)));
            return Transactions.execute(daos.get(shardId).sessionFactory, true, daos.get(shardId)::select,
                    criteria);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }).flatMap(Collection::stream).collect(Collectors.toList());
}

From source file:com.netflix.spinnaker.clouddriver.titus.client.RegionScopedTitusClient.java

@Override
public Map<String, List<String>> getTaskIdsForJobIds() {
    String filterByStates = "Launched,StartInitiated,Started";

    TaskQuery.Builder taskQueryBuilder = TaskQuery.newBuilder();
    taskQueryBuilder.putFilteringCriteria("attributes", "source:spinnaker")
            .putFilteringCriteria("taskStates", filterByStates).addFields("id").addFields("jobId");

    List<com.netflix.titus.grpc.protogen.Task> grpcTasks = getTasksWithFilter(taskQueryBuilder, 10000);
    return grpcTasks.stream().collect(Collectors.groupingBy(com.netflix.titus.grpc.protogen.Task::getJobId,
            mapping(com.netflix.titus.grpc.protogen.Task::getId, toList())));
}

From source file:com.ikanow.aleph2.graph.titan.utils.TitanGraphBuildingUtils.java

/** Utility to get the vertices in the DB matching the specified keys TODO: move to intermediate utils  
 * @param keys/*from   w  ww. j  av  a2  s. co m*/
 * @param bucket_filter
 * @return
 */
@SuppressWarnings("unchecked")
public static final Map<JsonNode, List<Vertex>> getGroupedVertices(final Collection<ObjectNode> keys,
        final TitanTransaction tx, final List<String> key_fields, final Predicate<Vertex> vertex_filter) {
    final Stream<TitanVertex> dups = Lambdas.get(() -> {
        final Map<String, Set<Object>> dedup_query_builder = keys.stream()
                .flatMap(j -> Optionals.streamOf(j.fields(), false))
                .collect(Collectors.groupingBy(kv -> kv.getKey(),
                        Collectors.mapping(kv -> jsonNodeToObject(kv.getValue()), Collectors.toSet())));
        ;

        //TODO (ALEPH-15): would be nice to support custom "fuzzier" queries, since we're doing a dedup stage to pick the actual winning vertices anyway
        // that way you could say query on tokenized-version of name and get anyone with the same first or last name (say) and then pick the most likely
        // one based on the graph ... of course you'd probably want the full graph for that, so it might end up being better served as a "self-analytic" to do is part
        // of post processing?
        // (NOTE: same remarks apply for edges)
        // (NOTE: currently I've been going in the opposite direction, ie enforcing only one vertex per keyset per bucket ... otherwise it's going to get really 
        //  confusing when you try to merge all the different versions that Titan creates because of the lack of an upsert function....)

        final TitanGraphQuery<?> matching_nodes_query = dedup_query_builder.entrySet().stream().reduce(
                tx.query(), (query, kv) -> query.has(kv.getKey(), Contain.IN, kv.getValue()),
                (query1, query2) -> query1 // (can't occur since reduce not parallel)
        );

        return Optionals.streamOf(matching_nodes_query.vertices(), false);
    });

    // Remove false positives, un-authorized nodes, and group by key

    final Map<JsonNode, List<Vertex>> grouped_vertices = dups
            .map(vertex -> Tuples._2T((Vertex) vertex, getElementProperties(vertex, key_fields)))
            .filter(vertex_key -> keys.contains(vertex_key._2())) // (remove false positives)
            .filter(vertex_key -> vertex_filter.test(vertex_key._1())) // (remove un-authorized nodes)
            .collect(Collectors.groupingBy(t2 -> (JsonNode) t2._2(), // (group by key)
                    Collectors.mapping(t2 -> t2._1(), Collectors.toList())));

    return grouped_vertices;
}