Example usage for java.util.stream Collectors groupingBy

List of usage examples for java.util.stream Collectors groupingBy

Introduction

In this page you can find the example usage for java.util.stream Collectors groupingBy.

Prototype

public static <T, K, A, D> Collector<T, ?, Map<K, D>> groupingBy(Function<? super T, ? extends K> classifier,
        Collector<? super T, A, D> downstream) 

Source Link

Document

Returns a Collector implementing a cascaded "group by" operation on input elements of type T , grouping elements according to a classification function, and then performing a reduction operation on the values associated with a given key using the specified downstream Collector .

Usage

From source file:com.baidu.rigel.biplatform.tesseract.isservice.search.service.impl.CallbackSearchServiceImpl.java

/**
 * @param context ?// w  w w.j a v  a2 s .c o m
 * @param query ?
 * @return 
 * @throws IndexAndSearchException exception occurred when 
 */
public SearchIndexResultSet query(QueryContext context, QueryRequest query) throws IndexAndSearchException {
    LOGGER.info(String.format(LogInfoConstants.INFO_PATTERN_FUNCTION_BEGIN, "callbackquery",
            "[callbackquery:" + query + "]"));
    if (query == null || context == null || StringUtils.isEmpty(query.getCubeId())) {
        LOGGER.error(String.format(LogInfoConstants.INFO_PATTERN_FUNCTION_EXCEPTION, "callbackquery",
                "[callbackquery:" + query + "]"));
        throw new IndexAndSearchException(
                TesseractExceptionUtils.getExceptionMessage(IndexAndSearchException.QUERYEXCEPTION_MESSAGE,
                        IndexAndSearchExceptionType.ILLEGALARGUMENT_EXCEPTION),
                IndexAndSearchExceptionType.ILLEGALARGUMENT_EXCEPTION);
    }
    // TODO ???
    if (query.getGroupBy() == null || query.getSelect() == null) {
        return null;
    }
    Map<String, String> requestParams = ((QueryContextAdapter) context).getQuestionModel().getRequestParams();
    // Build query target map
    Map<String, List<MiniCubeMeasure>> callbackMeasures = context.getQueryMeasures().stream()
            .filter(m -> m.getType().equals(MeasureType.CALLBACK)).map(m -> {
                CallbackMeasure tmp = (CallbackMeasure) m;
                for (Map.Entry<String, String> entry : tmp.getCallbackParams().entrySet()) {
                    if (requestParams.containsKey(entry.getKey())) {
                        tmp.getCallbackParams().put(entry.getKey(), requestParams.get(entry.getKey()));
                    }
                }
                return m;
            }).collect(Collectors.groupingBy(c -> ((CallbackMeasure) c).getCallbackUrl(), Collectors.toList()));
    if (callbackMeasures == null || callbackMeasures.isEmpty()) {
        LOGGER.error(String.format(LogInfoConstants.INFO_PATTERN_FUNCTION_EXCEPTION, "Empty callback measure",
                "[callbackquery:" + query + "]"));
        throw new IndexAndSearchException(
                TesseractExceptionUtils.getExceptionMessage(IndexAndSearchException.QUERYEXCEPTION_MESSAGE,
                        IndexAndSearchExceptionType.ILLEGALARGUMENT_EXCEPTION),
                IndexAndSearchExceptionType.ILLEGALARGUMENT_EXCEPTION);
    }
    LOGGER.info("Find callback targets " + callbackMeasures);

    // Keep group-by sequence.
    List<String> groupby = new ArrayList<String>(query.getGroupBy().getGroups());
    LinkedHashMap<String, List<String>> groupbyParams = new LinkedHashMap<String, List<String>>(groupby.size());
    for (String g : groupby) {
        groupbyParams.put(g, new ArrayList<String>());
    }

    LinkedHashMap<String, List<String>> whereParams = new LinkedHashMap<String, List<String>>();
    for (Expression e : query.getWhere().getAndList()) {
        List<String> l = e.getQueryValues().stream().filter(v -> !StringUtils.isEmpty(v.getValue()))
                .map(v -> v.getValue()).collect(Collectors.toList());
        if (groupbyParams.containsKey(e.getProperties())) {
            // if not contains SUMMARY_KEY, add it into group by list
            if (!l.contains(TesseractConstant.SUMMARY_KEY)) {
                l.add(TesseractConstant.SUMMARY_KEY);
            }
            // Put it into group by field
            groupbyParams.get(e.getProperties()).addAll(l);
        } else {
            // Put it into filter field
            if (CollectionUtils.isEmpty(l)) {
                List<Set<String>> tmp = e.getQueryValues().stream().map(v -> v.getLeafValues())
                        .collect(Collectors.toList());
                List<String> values = Lists.newArrayList();
                tmp.forEach(t -> values.addAll(t));
                whereParams.put(e.getProperties(), values);
            } else {
                whereParams.put(e.getProperties(), new ArrayList<String>(l));
            }
        }
    }

    // Prepare query tools
    //        CountDownLatch latch = new CountDownLatch(response.size());
    //        List<Future<CallbackResponse>> results = Lists.newArrayList();
    Map<CallbackExecutor, Future<CallbackResponse>> results = Maps.newHashMap();
    ExecutorCompletionService<CallbackResponse> service = new ExecutorCompletionService<CallbackResponse>(
            taskExecutor);
    StringBuilder callbackMeasureNames = new StringBuilder();
    for (Entry<String, List<MiniCubeMeasure>> e : callbackMeasures.entrySet()) {
        CallbackExecutor ce = new CallbackExecutor(e, groupbyParams, whereParams);
        results.put(ce, service.submit(ce));
        e.getValue().forEach(m -> {
            callbackMeasureNames.append(" " + m.getCaption() + " ");
        });
    }
    //        }
    Map<CallbackExecutor, CallbackResponse> response = new ConcurrentHashMap<CallbackExecutor, CallbackResponse>(
            callbackMeasures.size());
    StringBuffer sb = new StringBuffer();
    results.forEach((k, v) -> {
        try {
            response.put(k, v.get());
        } catch (Exception e1) {
            LOGGER.error(e1.getMessage(), e1);
            sb.append(": " + callbackMeasureNames.toString()
                    + " ??, ?");
        }
    });
    if (!StringUtils.isEmpty(sb.toString())) {
        if (ThreadLocalPlaceholder.getProperty(ThreadLocalPlaceholder.ERROR_MSG_KEY) != null) {
            ThreadLocalPlaceholder.unbindProperty(ThreadLocalPlaceholder.ERROR_MSG_KEY);
        }
        ThreadLocalPlaceholder.bindProperty(ThreadLocalPlaceholder.ERROR_MSG_KEY, sb.toString());
    }
    // Package result
    SqlQuery sqlQuery = QueryRequestUtil.transQueryRequest2SqlQuery(query);
    SearchIndexResultSet result = null;
    if (!response.isEmpty()) {
        result = packageResultRecords(query, sqlQuery, response);
    } else {
        result = new SearchIndexResultSet(new Meta(query.getGroupBy().getGroups().toArray(new String[0])), 0);
    }

    LOGGER.info(String.format(LogInfoConstants.INFO_PATTERN_FUNCTION_END, "query", "[query:" + query + "]"));
    return result;
}

From source file:com.thinkbiganalytics.metadata.jobrepo.nifi.provenance.NifiStatsJmsReceiver.java

private void assignNiFiBulletinErrors(List<JpaNifiFeedProcessorStats> stats) {

    //might need to query with the 'after' parameter

    //group the FeedStats by processorId_flowfileId

    Map<String, Map<String, List<JpaNifiFeedProcessorStats>>> processorFlowFilesStats = stats.stream()
            .filter(s -> s.getProcessorId() != null)
            .collect(Collectors.groupingBy(NifiFeedProcessorStats::getProcessorId,
                    Collectors.groupingBy(NifiFeedProcessorStats::getLatestFlowFileId)));

    Set<String> processorIds = processorFlowFilesStats.keySet();
    //strip out those processorIds that are part of a reusable flow
    Set<String> nonReusableFlowProcessorIds = processorIds.stream()
            .filter(processorId -> !provenanceEventFeedUtil.isReusableFlowProcessor(processorId))
            .collect(Collectors.toSet());

    //find all errors for the processors
    List<BulletinDTO> errors = nifiBulletinExceptionExtractor.getErrorBulletinsForProcessorId(processorIds,
            lastBulletinId);//w w w  . ja v a 2 s.c  o  m

    if (errors != null && !errors.isEmpty()) {
        Set<JpaNifiFeedProcessorStats> statsToUpdate = new HashSet<>();
        // first look for matching feed flow and processor ids.  otherwise look for processor id matches that are not part of reusable flows
        errors.stream().forEach(b -> {
            stats.stream().forEach(stat -> {
                if (stat.getLatestFlowFileId() != null
                        && b.getSourceId().equalsIgnoreCase(stat.getProcessorId())
                        && b.getMessage().contains(stat.getLatestFlowFileId())) {
                    stat.setErrorMessageTimestamp(getAdjustBulletinDateTime(b));
                    stat.setErrorMessages(b.getMessage());
                    addFeedProcessorError(stat);
                    statsToUpdate.add(stat);
                } else if (nonReusableFlowProcessorIds.contains(b.getSourceId())
                        && b.getSourceId().equalsIgnoreCase(stat.getProcessorId())) {
                    stat.setErrorMessageTimestamp(getAdjustBulletinDateTime(b));
                    stat.setErrorMessages(b.getMessage());
                    addFeedProcessorError(stat);
                    statsToUpdate.add(stat);
                }
            });
        });
        lastBulletinId = errors.stream().mapToLong(b -> b.getId()).max().getAsLong();

        if (!statsToUpdate.isEmpty()) {
            notifyClusterOfFeedProcessorErrors(statsToUpdate);
            if (persistErrors) {
                nifiEventStatisticsProvider.save(new ArrayList<>(statsToUpdate));
            }
        }
    }
}

From source file:com.github.horrorho.liquiddonkey.cloud.Looter.java

void snapshot(HttpClient client, Core core, HttpAgent agent, Backup backup, int id)
        throws BadDataException, IOException, InterruptedException {

    boolean toReport = config.debug().toReport();
    Path path = config.file().base().resolve(backup.backupUDID()).resolve(config.file().reportsDirectory());
    Predicate<ICloud.MBSFile> nonUndecryptableFilter = file -> !file.getAttributes().hasEncryptionKey()
            || backup.keyBagManager().fileKey(file) != null;

    // Retrieve file list.
    int limit = config.client().listLimit();
    Snapshot snapshot = agent/*from  w  w  w.j  av a  2 s. c om*/
            .execute((c, mmeAuthToken) -> Snapshots.from(c, core, mmeAuthToken, backup, id, limit));

    if (snapshot == null) {
        logger.warn("-- snapshot() > snapshot not found: {}", id);
        return;
    }
    ICloud.MBSSnapshotAttributes attr = snapshot.mbsSnapshot().getAttributes();
    logger.info("-- snapshot() > files: {}", snapshot.filesCount());
    std.println();
    std.println(
            "Retrieving snapshot: " + id + " (" + attr.getDeviceName() + " " + attr.getProductVersion() + ")");

    // Total files.
    std.println("Files(total): " + snapshot.filesCount());
    if (toReport) {
        csvWriter.files(sorted(snapshot), path.resolve("snapshot_" + id + "_files.csv"));
    }

    // Mode summary.
    Map<Mode, Long> modes = snapshot.files().stream()
            .collect(Collectors.groupingBy(Mode::mode, Collectors.counting()));
    logger.info("-- snapshot() > modes: {}", modes);

    // Non-empty files filter.
    snapshot = Snapshots.from(snapshot, file -> file.getSize() != 0 && file.hasSignature());
    logger.info("-- snapshot() > filtered non empty, remaining: {}", snapshot.filesCount());
    std.println("Files(non-empty): " + snapshot.filesCount());

    // User filter
    snapshot = Snapshots.from(snapshot, filter);
    logger.info("-- snapshot() > filtered configured, remaining: {}", snapshot.filesCount());
    std.println("Files(filtered): " + snapshot.filesCount());
    if (toReport) {
        csvWriter.files(sorted(snapshot), path.resolve("snapshot_" + id + "_filtered.csv"));
    }

    // Undecryptable filter
    Snapshot undecryptable = Snapshots.from(snapshot, nonUndecryptableFilter.negate());
    snapshot = Snapshots.from(snapshot, nonUndecryptableFilter);
    logger.info("-- snapshot() > filtered undecryptable, remaining: {}", snapshot.filesCount());
    std.println("Files(non-undecryptable): " + snapshot.filesCount());

    // Dump undecryptables
    //        Map<ICloud.MBSFile, Outcome> undecryptableOutcomes = undecryptables.stream()
    //                .collect(Collectors.toMap(Function.identity(), file -> Outcome.FAILED_DECRYPT_NO_KEY));
    //        outcomesConsumer.accept(undecryptableOutcomes);
    if (toReport) {
        csvWriter.files(sorted(undecryptable), path.resolve("snapshot_" + id + "_undecryptable.csv"));
    }

    // Local filter
    if (config.engine().toForceOverwrite()) {
        logger.debug("-- snapshot() > forced overwrite");
    } else {
        long a = System.currentTimeMillis();
        snapshot = LocalFileFilter.from(snapshot, config.file()).apply(snapshot);
        long b = System.currentTimeMillis();
        logger.info("-- snapshot() > filtered local, remaining: {} delay(ms): {}", snapshot.filesCount(),
                b - a);
        std.println("Files(non-local): " + snapshot.filesCount());
    }

    if (snapshot.filesCount() == 0) {
        return;
    }

    // Retrieve
    Outcomes outcomes = Outcomes.create();
    OutcomesProgressPercentage progress = OutcomesProgressPercentage.from(snapshot, std);
    Consumer<Map<ICloud.MBSFile, Outcome>> outcomesConsumer = outcomes.andThen(progress);
    std.println();
    std.println("Retrieving: " + Bytes.humanize(progress.totalBytes()));

    // Fetch files
    SnapshotDownloader.from(config.engine(), config.file()).download(agent, core, snapshot, outcomesConsumer);

    std.println();
    std.println("Completed:");
    outcomes.print(std);
    std.println();
}

From source file:com.mycompany.wolf.Room.java

private void notifyWitchSave() {
    ScheduledFuture[] holder = new ScheduledFuture[1];
    holder[0] = scheduledExecutorService.schedule(() -> {
        holder[0].cancel(true);//from w w w . j a v  a 2  s. co  m
        notifyHunterKillIfDead();
    }, WITCH_SAVE_DURATION, TimeUnit.MILLISECONDS);

    /*
     *  
     */
    List<Map.Entry<String, Long>> top2 = wolfVotings.values().stream()
            .collect(Collectors.groupingBy(wolfVoting -> wolfVoting.playerId, Collectors.counting())).entrySet()
            .stream()
            .sorted(Comparator.comparingLong((Map.Entry<String, Long> entry) -> entry.getValue()).reversed())
            .limit(2).collect(Collectors.toList());
    if (top2.size() == 1 || (top2.size() > 1 && top2.get(0).getValue().compareTo(top2.get(1).getValue()) > 0)) {
        theVoted = top2.get(0).getKey();
    } else if (top2.size() > 1) {
        theVoted = top2.get(0).getKey();
    } else {
        List<String> undead = sessions.stream()
                .filter(session -> !WOLF.equals(session.getUserProperties().get("role")))
                .map(session -> getPlayerId(session)).filter(((Predicate<String>) dead::contains).negate())
                .collect(Collectors.toList());
        theVoted = undead.get(new Random().nextInt(undead.size()));
    }
    newlyDead.add(theVoted);

    /*
     * ? 
     */
    witchPoisonings.values().stream().map(witchPoisoning -> witchPoisoning.playerId).forEach(newlyDead::add);

    Map<String, Object> notifyWolfVoted = ImmutableMap.of("code", "notifyWolfVoted", "properties",
            ImmutableMap.of("playerId", theVoted));
    String jsonText = JsonUtils.toString(notifyWolfVoted);
    sessions.stream().forEach(s -> {
        s.getAsyncRemote().sendText(jsonText);
    });
}

From source file:nu.yona.server.analysis.service.ActivityService.java

private Map<LocalDate, Set<WeekActivity>> getWeekActivitiesGroupedByDate(UUID userAnonymizedId,
        Interval interval) {//from   www.ja  v  a 2s . co  m
    Set<WeekActivity> weekActivityEntities = weekActivityRepository.findAll(userAnonymizedId,
            interval.startDate, interval.endDate);
    return weekActivityEntities.stream()
            .collect(Collectors.groupingBy(IntervalActivity::getStartDate, Collectors.toSet()));
}

From source file:com.uber.hoodie.index.bloom.TestHoodieBloomIndex.java

@Test
public void testRangePruning() {

    HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).build();
    HoodieBloomIndex index = new HoodieBloomIndex(config);

    final Map<String, List<BloomIndexFileInfo>> partitionToFileIndexInfo = new HashMap<>();
    partitionToFileIndexInfo.put("2017/10/22",
            Arrays.asList(new BloomIndexFileInfo("f1"), new BloomIndexFileInfo("f2", "000", "000"),
                    new BloomIndexFileInfo("f3", "001", "003"), new BloomIndexFileInfo("f4", "002", "007"),
                    new BloomIndexFileInfo("f5", "009", "010")));

    JavaPairRDD<String, String> partitionRecordKeyPairRDD = jsc
            .parallelize(Arrays.asList(new Tuple2<>("2017/10/22", "003"), new Tuple2<>("2017/10/22", "002"),
                    new Tuple2<>("2017/10/22", "005"), new Tuple2<>("2017/10/22", "004")))
            .mapToPair(t -> t);//from  w  w w . j a  v a2 s.co  m

    List<Tuple2<String, Tuple2<String, HoodieKey>>> comparisonKeyList = index
            .explodeRecordRDDWithFileComparisons(partitionToFileIndexInfo, partitionRecordKeyPairRDD).collect();

    assertEquals(10, comparisonKeyList.size());
    Map<String, List<String>> recordKeyToFileComps = comparisonKeyList.stream()
            .collect(Collectors.groupingBy(t -> t._2()._2().getRecordKey(),
                    Collectors.mapping(t -> t._2()._1().split("#")[0], Collectors.toList())));

    assertEquals(4, recordKeyToFileComps.size());
    assertEquals(Arrays.asList("f1", "f3", "f4"), recordKeyToFileComps.get("002"));
    assertEquals(Arrays.asList("f1", "f3", "f4"), recordKeyToFileComps.get("003"));
    assertEquals(Arrays.asList("f1", "f4"), recordKeyToFileComps.get("004"));
    assertEquals(Arrays.asList("f1", "f4"), recordKeyToFileComps.get("005"));
}

From source file:io.dropwizard.sharding.dao.LookupDao.java

/**
 * Queries across various shards and returns the results.
 * <b>Note:</b> This method runs the query serially and is efficient over scatterGather and serial get of all key
 * @param keys The list of lookup keys//w w w .jav a2 s  .  c o  m
 * @return List of elements or empty if none match
 */
public List<T> get(List<String> keys) {
    Map<Integer, List<String>> lookupKeysGroupByShards = keys.stream().collect(Collectors.groupingBy(
            key -> ShardCalculator.shardId(shardManager, bucketIdExtractor, key), Collectors.toList()));

    return lookupKeysGroupByShards.keySet().stream().map(shardId -> {
        try {
            DetachedCriteria criteria = DetachedCriteria.forClass(entityClass)
                    .add(Restrictions.in(keyField.getName(), lookupKeysGroupByShards.get(shardId)));
            return Transactions.execute(daos.get(shardId).sessionFactory, true, daos.get(shardId)::select,
                    criteria);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }).flatMap(Collection::stream).collect(Collectors.toList());
}

From source file:com.netflix.spinnaker.clouddriver.titus.client.RegionScopedTitusClient.java

@Override
public Map<String, List<String>> getTaskIdsForJobIds() {
    String filterByStates = "Launched,StartInitiated,Started";

    TaskQuery.Builder taskQueryBuilder = TaskQuery.newBuilder();
    taskQueryBuilder.putFilteringCriteria("attributes", "source:spinnaker")
            .putFilteringCriteria("taskStates", filterByStates).addFields("id").addFields("jobId");

    List<com.netflix.titus.grpc.protogen.Task> grpcTasks = getTasksWithFilter(taskQueryBuilder, 10000);
    return grpcTasks.stream().collect(Collectors.groupingBy(com.netflix.titus.grpc.protogen.Task::getJobId,
            mapping(com.netflix.titus.grpc.protogen.Task::getId, toList())));
}

From source file:com.ikanow.aleph2.graph.titan.utils.TitanGraphBuildingUtils.java

/** Utility to get the vertices in the DB matching the specified keys TODO: move to intermediate utils  
 * @param keys/*from   w  ww. j  av  a2  s. co m*/
 * @param bucket_filter
 * @return
 */
@SuppressWarnings("unchecked")
public static final Map<JsonNode, List<Vertex>> getGroupedVertices(final Collection<ObjectNode> keys,
        final TitanTransaction tx, final List<String> key_fields, final Predicate<Vertex> vertex_filter) {
    final Stream<TitanVertex> dups = Lambdas.get(() -> {
        final Map<String, Set<Object>> dedup_query_builder = keys.stream()
                .flatMap(j -> Optionals.streamOf(j.fields(), false))
                .collect(Collectors.groupingBy(kv -> kv.getKey(),
                        Collectors.mapping(kv -> jsonNodeToObject(kv.getValue()), Collectors.toSet())));
        ;

        //TODO (ALEPH-15): would be nice to support custom "fuzzier" queries, since we're doing a dedup stage to pick the actual winning vertices anyway
        // that way you could say query on tokenized-version of name and get anyone with the same first or last name (say) and then pick the most likely
        // one based on the graph ... of course you'd probably want the full graph for that, so it might end up being better served as a "self-analytic" to do is part
        // of post processing?
        // (NOTE: same remarks apply for edges)
        // (NOTE: currently I've been going in the opposite direction, ie enforcing only one vertex per keyset per bucket ... otherwise it's going to get really 
        //  confusing when you try to merge all the different versions that Titan creates because of the lack of an upsert function....)

        final TitanGraphQuery<?> matching_nodes_query = dedup_query_builder.entrySet().stream().reduce(
                tx.query(), (query, kv) -> query.has(kv.getKey(), Contain.IN, kv.getValue()),
                (query1, query2) -> query1 // (can't occur since reduce not parallel)
        );

        return Optionals.streamOf(matching_nodes_query.vertices(), false);
    });

    // Remove false positives, un-authorized nodes, and group by key

    final Map<JsonNode, List<Vertex>> grouped_vertices = dups
            .map(vertex -> Tuples._2T((Vertex) vertex, getElementProperties(vertex, key_fields)))
            .filter(vertex_key -> keys.contains(vertex_key._2())) // (remove false positives)
            .filter(vertex_key -> vertex_filter.test(vertex_key._1())) // (remove un-authorized nodes)
            .collect(Collectors.groupingBy(t2 -> (JsonNode) t2._2(), // (group by key)
                    Collectors.mapping(t2 -> t2._1(), Collectors.toList())));

    return grouped_vertices;
}