Example usage for java.util.stream Collectors groupingBy

Introduction

In this page you can find the example usage for java.util.stream Collectors groupingBy.

Prototype

public static <T, K, A, D> Collector<T, ?, Map<K, D>> groupingBy(Function<? super T, ? extends K> classifier,
        Collector<? super T, A, D> downstream)

Source Link

Document

Returns a Collector implementing a cascaded "group by" operation on input elements of type T , grouping elements according to a classification function, and then performing a reduction operation on the values associated with a given key using the specified downstream Collector .

Usage

From source file:org.talend.dataprep.schema.xls.XlsSchemaParser.java

/**
 *
 *
 * @param colId the column id.//from  w w  w.j  ava2s. c o  m
 * @param columnRows all rows with previously guessed type: key=row number, value= guessed type
 * @param averageHeaderSize
 * @return
 */
private Type guessColumnType(Integer colId, SortedMap<Integer, String> columnRows, int averageHeaderSize) {

    // calculate number per type

    Map<String, Long> perTypeNumber = columnRows.tailMap(averageHeaderSize).values() //
            .stream() //
            .collect(Collectors.groupingBy(w -> w, Collectors.counting()));

    OptionalLong maxOccurrence = perTypeNumber.values().stream().mapToLong(Long::longValue).max();

    if (!maxOccurrence.isPresent()) {
        return ANY;
    }

    List<String> duplicatedMax = new ArrayList<>();

    perTypeNumber.forEach((type1, aLong) -> {
        if (aLong >= maxOccurrence.getAsLong()) {
            duplicatedMax.add(type1);
        }
    });

    String guessedType;
    if (duplicatedMax.size() == 1) {
        guessedType = duplicatedMax.get(0);
    } else {
        // as we have more than one type we guess ANY
        guessedType = ANY.getName();
    }

    LOGGER.debug("guessed type for column #{} is {}", colId, guessedType);
    return Type.get(guessedType);
}

From source file:org.trustedanalytics.storage.InfluxDataStore.java

private Map<Date, Map<Double, Double>> read(String serieName, String key, String groupingInterval,
        String timeLimit) {//from  ww  w. j  ava  2 s . c  o m

    String query = String.format("select count(%s) from %s group by time(%s), %s where time > now() - %s", key,
            serieName, groupingInterval, key, timeLimit);
    LOG.debug(query);

    List<Serie> queryResult = store.query(properties.getDatabaseName(), query, TimeUnit.MILLISECONDS);
    LOG.debug("{} series read", queryResult.size());
    if (queryResult.isEmpty()) {
        return null;
    }

    LOG.debug("{} rows read in first serie", queryResult.get(0).getRows().size());
    return queryResult.get(0).getRows().stream()
            .map(row -> new SpaceShuttleRecord((Double) row.get("time"), (Double) row.get("class"),
                    (Double) row.get("count")))
            .collect(Collectors.groupingBy(SpaceShuttleRecord::getTimestamp,
                    SpaceShuttleRecordCollector.collect()));
}

From source file:org.zanata.rest.service.LocalesService.java

@Transactional(readOnly = true)
@Override//from   w w w .j a v a 2s .  c  om
public Response get(@QueryParam("filter") String filter, @QueryParam("sort") String fields,
        @DefaultValue("1") @QueryParam("page") int page,
        @DefaultValue("10") @QueryParam("sizePerPage") int sizePerPage) {
    List<HLocale> locales;
    int totalCount;
    int validatedPageSize = validatePageSize(sizePerPage);
    int offset = (validatePage(page) - 1) * validatedPageSize;
    boolean isAdmin = identity != null && identity.hasRole("admin");
    if (isAdmin) {
        locales = localeServiceImpl.getAllLocales(offset, validatedPageSize, filter,
                convertToSortField(fields));
        totalCount = localeServiceImpl.getLocalesTotalCount(filter);
    } else {
        locales = localeServiceImpl.getSupportedLocales(offset, validatedPageSize, filter,
                convertToSortField(fields));
        totalCount = localeServiceImpl.getSupportedLocalesTotalCount(filter);
    }
    List<LanguageTeamSearchResult> localesRefs = locales.stream()
            .map(hLocale -> LocaleService.convertHLocaleToSearchResultDTO(hLocale))
            .collect(Collectors.toList());
    LocalesResults localesResults = new LocalesResults(totalCount, localesRefs);

    if (isAdmin) {
        List<LocaleId> localeIds = localeDAO.findAll().stream().map(hLocale -> hLocale.getLocaleId())
                .collect(Collectors.toList());
        // Map all requests to localeIds, to prevent multiple requests
        Map<LocaleId, Long> allRequests = requestService
                .getPendingLanguageRequests(localeIds.toArray(new LocaleId[localeIds.size()])).stream()
                .collect(Collectors.groupingBy(languageRequest -> languageRequest.getLocale().getLocaleId(),
                        Collectors.counting()));
        for (LanguageTeamSearchResult searchResult : localesRefs) {
            searchResult.setRequestCount(
                    firstNonNull(allRequests.get(searchResult.getLocaleDetails().getLocaleId()), 0L));
        }
    }
    return Response.ok(localesResults).build();
}

From source file:picard.sam.markduplicates.UmiGraph.java

public UmiGraph(DuplicateSet set, String umiTag, String assignedUmiTag, boolean allowMissingUmis) {
    this.umiTag = umiTag;
    this.assignedUmiTag = assignedUmiTag;
    this.allowMissingUmis = allowMissingUmis;
    records = set.getRecords();//from ww  w.j  a  v a  2s . c o m

    // First ensure that all the reads have a UMI, if any reads are missing a UMI throw an exception unless allowMissingUmis is true
    for (SAMRecord rec : records) {
        if (UmiUtil.getSanitizedUMI(rec, umiTag) == null) {
            if (allowMissingUmis) {
                rec.setAttribute(umiTag, "");
            } else {
                throw new PicardException("Read " + rec.getReadName() + " does not contain a UMI with the "
                        + umiTag + " attribute.");
            }
        }
    }

    // Count the number of times each UMI occurs
    umiCounts = records.stream()
            .collect(Collectors.groupingBy(p -> UmiUtil.getSanitizedUMI(p, umiTag), counting()));

    // At first we consider every UMI as if it were its own duplicate set
    numUmis = umiCounts.size();
    umi = new String[numUmis];
    duplicateSetID = IntStream.rangeClosed(0, numUmis - 1).toArray();

    int i = 0;
    for (String key : umiCounts.keySet()) {
        umi[i] = key;
        i++;
    }
}

From source file:picard.util.LiftoverUtils.java

/**
 *    Normalizes and left aligns a {@link VariantContextBuilder}.
 *    Note: this will modify the start/stop and alleles of this builder.
 *    Also note: if the reference allele does not match the reference sequence, this method will throw an exception
 *
 *    Based on Adrian Tan, Gon&ccedil;alo R. Abecasis and Hyun Min Kang. (2015)
 *    Unified Representation of Genetic Variants. Bioinformatics.
 *
 *///ww w. ja  v  a2 s  . c  o m
protected static void leftAlignVariant(final VariantContextBuilder builder, final int start, final int end,
        final List<Allele> alleles, final ReferenceSequence referenceSequence) {

    // make sure that referenceAllele matches reference
    if (!referenceAlleleMatchesReferenceForIndel(alleles, referenceSequence, start, end)) {
        throw new IllegalArgumentException(String.format("Reference allele doesn't match reference at %s:%d-%d",
                referenceSequence.getName(), start, end));
    }

    boolean changesInAlleles = true;

    final Map<Allele, byte[]> alleleBasesMap = new HashMap<>();
    alleles.forEach(a -> alleleBasesMap.put(a, a.getBases()));

    int theStart = start;
    int theEnd = end;

    // 1. while changes in alleles do
    while (changesInAlleles) {

        changesInAlleles = false;
        // 2. if alleles end with the same nucleotide then
        if (alleleBasesMap.values().stream()
                .collect(Collectors.groupingBy(a -> a[a.length - 1], Collectors.toSet())).size() == 1
                && theEnd > 1) {
            // 3. truncate rightmost nucleotide of each allele
            for (final Allele allele : alleleBasesMap.keySet()) {
                alleleBasesMap.put(allele, truncateBase(alleleBasesMap.get(allele), true));
            }
            changesInAlleles = true;
            theEnd--;
            // 4. end if
        }

        // 5. if there exists an empty allele then
        if (alleleBasesMap.values().stream().map(a -> a.length).anyMatch(l -> l == 0)) {
            // 6. extend alleles 1 nucleotide to the left
            for (final Allele allele : alleleBasesMap.keySet()) {
                // the first -1 for zero-base (getBases) versus 1-based (variant position)
                // another   -1 to get the base prior to the location of the start of the allele
                final byte extraBase = (theStart > 1) ? referenceSequence.getBases()[theStart - 2]
                        : referenceSequence.getBases()[theEnd];

                alleleBasesMap.put(allele, extendOneBase(alleleBasesMap.get(allele), extraBase));
            }
            changesInAlleles = true;
            theStart--;

            // 7. end if
        }
    }

    // 8. while leftmost nucleotide of each allele are the same and all alleles have length 2 or more do
    while (alleleBasesMap.values().stream().allMatch(a -> a.length >= 2) &&

            alleleBasesMap.values().stream().collect(Collectors.groupingBy(a -> a[0], Collectors.toSet()))
                    .size() == 1) {

        //9. truncate the leftmost base of the alleles
        for (final Allele allele : alleleBasesMap.keySet()) {
            alleleBasesMap.put(allele, truncateBase(alleleBasesMap.get(allele), false));
        }
        theStart++;
    }

    builder.start(theStart);
    builder.stop(theEnd);

    final Map<Allele, Allele> fixedAlleleMap = alleleBasesMap.entrySet().stream().collect(
            Collectors.toMap(Map.Entry::getKey, me -> Allele.create(me.getValue(), me.getKey().isReference())));

    //retain original order:
    List<Allele> fixedAlleles = alleles.stream().map(fixedAlleleMap::get).collect(Collectors.toList());

    builder.alleles(fixedAlleles);
}

From source file:scouterx.webapp.request.DictionaryRequest.java

/**
 * dictionary key list to find text from dictionary
 *
 * @param dictKeys - format : [service:10001,service:10002,obj:20001,sql:55555] (bracket is optional)
 *//*from   w  ww  .  j av a 2s  .c  o m*/
@QueryParam("dictKeys")
public void setDictSets(String dictKeys) {
    Set<String> textList = ZZ.splitParamStringSet(dictKeys);
    dictSets = textList.stream().map(s -> {
        String[] parts = StringUtils.split(s, COLON);
        return new SDictionaryText(parts[0], Integer.valueOf(parts[1]), null);
    }).collect(Collectors.groupingBy(SDictionaryText::getTextType, Collectors.toSet()));

}

From source file:stroom.statistics.server.sql.SQLStatisticEventStore.java

@Override
public void putEvents(final List<StatisticEvent> statisticEvents) {

    statisticEvents.stream().collect(Collectors.groupingBy(StatisticEvent::getName, Collectors.toList()))
            .values().forEach(this::putBatch);
}

From source file:uk.co.jassoft.markets.api.SentimentController.java

@PreAuthorize("permitAll")
@RequestMapping(value = "company/{id}/period/{period}", method = RequestMethod.GET)
public @ResponseBody List<SentimentByDate> getSentimentsByCompany(final HttpServletResponse response,
        @PathVariable String id, @PathVariable PeriodType period) {

    // This could do some mongoDB magic to only select the StorySentiments in the date
    List<StorySentiment> storySentiments = storySentimentRepository.findByCompany(id);

    List<SentimentByDate> companySentiments = storySentiments.stream()
            .sorted((s1, s2) -> s1.getStoryDate().compareTo(s2.getStoryDate()))
            .map(storySentiment -> new SentimentByDate(id, storySentiment.getStoryDate(),
                    storySentiment.getEntitySentiment().stream()
                            .collect(Collectors.summingInt(value -> value.getSentiment()))))
            .collect(Collectors.groupingBy(
                    sentimentByDate -> getTrincatedDate(period, sentimentByDate.getDate()),
                    Collectors.summingInt(value1 -> value1.getSentiment())))
            .entrySet().stream()/*from  ww  w  .  j  a v  a2s .c  o m*/
            .map(dateIntegerEntry -> new SentimentByDate(id, dateIntegerEntry.getKey(),
                    dateIntegerEntry.getValue()))
            .sorted((o1, o2) -> o1.getDate().compareTo(o2.getDate())).collect(Collectors.toList());

    response.setHeader("Cache-Control", "max-age=" + CacheTimeout.FIFTEEN_MINUTES);
    return companySentiments;
}

From source file:uk.co.jassoft.markets.api.SentimentController.java

@PreAuthorize("permitAll")
@RequestMapping(value = "company/{id}", method = RequestMethod.GET)
public @ResponseBody SentimentByDate getCurrentSentimentsByCompany(final HttpServletResponse response,
        @PathVariable String id) {

    // This could do some mongoDB magic to only select the StorySentiments in the date range
    List<StorySentiment> storySentiments = storySentimentRepository.findByCompany(id);

    List<SentimentByDate> companySentiments = storySentiments.stream().filter(isToday())
            .sorted((s1, s2) -> s1.getStoryDate().compareTo(s2.getStoryDate()))
            .map(storySentiment -> new SentimentByDate(id, storySentiment.getStoryDate(),
                    storySentiment.getEntitySentiment().stream()
                            .collect(Collectors.summingInt(value -> value.getSentiment()))))
            .collect(Collectors.groupingBy(
                    sentimentByDate -> getTrincatedDate(PeriodType.Day, sentimentByDate.getDate()),
                    Collectors.summingInt(value1 -> value1.getSentiment())))
            .entrySet().stream()//  ww  w.ja v a  2 s.  c o  m
            .map(dateIntegerEntry -> new SentimentByDate(id, dateIntegerEntry.getKey(),
                    dateIntegerEntry.getValue()))
            .sorted((o1, o2) -> o1.getDate().compareTo(o2.getDate())).collect(Collectors.toList());

    response.setHeader("Cache-Control", "max-age=" + CacheTimeout.FIFTEEN_MINUTES);
    return companySentiments.isEmpty() ? new SentimentByDate(id, new Date(), null) : companySentiments.get(0);
}

From source file:uk.co.jassoft.markets.api.SentimentController.java

@PreAuthorize("permitAll")
@RequestMapping(value = "{direction}/period/{period}/limit/{limit}", method = RequestMethod.GET)
public @ResponseBody List<CompanySentiment> getChartToday(final HttpServletResponse response,
        @PathVariable String direction, @PathVariable PeriodType period, @PathVariable int limit)
        throws UnknownHostException {

    List<StorySentiment> storySentiments = storySentimentRepository
            .findByStoryDateGreaterThan(DateUtils.truncate(new Date(), Calendar.DATE));

    List<CompanySentiment> todayCompanySentiments = storySentiments.stream()
            .map(storySentiment -> new ImmutablePair<>(storySentiment.getCompany(),
                    storySentiment.getEntitySentiment().stream()
                            .collect(Collectors.summingInt(value -> value.getSentiment()))))
            .collect(Collectors.groupingBy(pair -> pair.getKey(),
                    Collectors.summingInt(value -> value.getValue())))
            .entrySet().stream().map(stringIntegerEntry -> {
                Company company = companyRepository.findOne(stringIntegerEntry.getKey());
                return new CompanySentiment(stringIntegerEntry.getKey(), company.getName(),
                        stringIntegerEntry.getValue());
            }).sorted((o1, o2) -> {//from  w  w w  .j  a  va2  s  . c o  m
                switch (direction) {
                case "highest":
                    return Integer.compare(o2.getSentiment(), o1.getSentiment());

                case "lowest":
                default:
                    return Integer.compare(o1.getSentiment(), o2.getSentiment());
                }
            }).limit(limit).collect(Collectors.toList());

    response.setHeader("Cache-Control", "max-age=" + CacheTimeout.FIFTEEN_MINUTES);
    return todayCompanySentiments;
}