List of usage examples for java.util.stream Collectors mapping
public static <T, U, A, R> Collector<T, ?, R> mapping(Function<? super T, ? extends U> mapper, Collector<? super U, A, R> downstream)
From source file:com.uber.hoodie.index.bloom.TestHoodieBloomIndex.java
@Test public void testRangePruning() { HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).build(); HoodieBloomIndex index = new HoodieBloomIndex(config); final Map<String, List<BloomIndexFileInfo>> partitionToFileIndexInfo = new HashMap<>(); partitionToFileIndexInfo.put("2017/10/22", Arrays.asList(new BloomIndexFileInfo("f1"), new BloomIndexFileInfo("f2", "000", "000"), new BloomIndexFileInfo("f3", "001", "003"), new BloomIndexFileInfo("f4", "002", "007"), new BloomIndexFileInfo("f5", "009", "010"))); JavaPairRDD<String, String> partitionRecordKeyPairRDD = jsc .parallelize(Arrays.asList(new Tuple2<>("2017/10/22", "003"), new Tuple2<>("2017/10/22", "002"), new Tuple2<>("2017/10/22", "005"), new Tuple2<>("2017/10/22", "004"))) .mapToPair(t -> t);/* w w w. j ava2s .c om*/ List<Tuple2<String, Tuple2<String, HoodieKey>>> comparisonKeyList = index .explodeRecordRDDWithFileComparisons(partitionToFileIndexInfo, partitionRecordKeyPairRDD).collect(); assertEquals(10, comparisonKeyList.size()); Map<String, List<String>> recordKeyToFileComps = comparisonKeyList.stream() .collect(Collectors.groupingBy(t -> t._2()._2().getRecordKey(), Collectors.mapping(t -> t._2()._1().split("#")[0], Collectors.toList()))); assertEquals(4, recordKeyToFileComps.size()); assertEquals(Arrays.asList("f1", "f3", "f4"), recordKeyToFileComps.get("002")); assertEquals(Arrays.asList("f1", "f3", "f4"), recordKeyToFileComps.get("003")); assertEquals(Arrays.asList("f1", "f4"), recordKeyToFileComps.get("004")); assertEquals(Arrays.asList("f1", "f4"), recordKeyToFileComps.get("005")); }
From source file:com.ikanow.aleph2.graph.titan.utils.TitanGraphBuildingUtils.java
/** Utility to get the vertices in the DB matching the specified keys TODO: move to intermediate utils * @param keys// w w w . j a va 2s . c o m * @param bucket_filter * @return */ @SuppressWarnings("unchecked") public static final Map<JsonNode, List<Vertex>> getGroupedVertices(final Collection<ObjectNode> keys, final TitanTransaction tx, final List<String> key_fields, final Predicate<Vertex> vertex_filter) { final Stream<TitanVertex> dups = Lambdas.get(() -> { final Map<String, Set<Object>> dedup_query_builder = keys.stream() .flatMap(j -> Optionals.streamOf(j.fields(), false)) .collect(Collectors.groupingBy(kv -> kv.getKey(), Collectors.mapping(kv -> jsonNodeToObject(kv.getValue()), Collectors.toSet()))); ; //TODO (ALEPH-15): would be nice to support custom "fuzzier" queries, since we're doing a dedup stage to pick the actual winning vertices anyway // that way you could say query on tokenized-version of name and get anyone with the same first or last name (say) and then pick the most likely // one based on the graph ... of course you'd probably want the full graph for that, so it might end up being better served as a "self-analytic" to do is part // of post processing? // (NOTE: same remarks apply for edges) // (NOTE: currently I've been going in the opposite direction, ie enforcing only one vertex per keyset per bucket ... otherwise it's going to get really // confusing when you try to merge all the different versions that Titan creates because of the lack of an upsert function....) final TitanGraphQuery<?> matching_nodes_query = dedup_query_builder.entrySet().stream().reduce( tx.query(), (query, kv) -> query.has(kv.getKey(), Contain.IN, kv.getValue()), (query1, query2) -> query1 // (can't occur since reduce not parallel) ); return Optionals.streamOf(matching_nodes_query.vertices(), false); }); // Remove false positives, un-authorized nodes, and group by key final Map<JsonNode, List<Vertex>> grouped_vertices = dups .map(vertex -> Tuples._2T((Vertex) vertex, getElementProperties(vertex, key_fields))) .filter(vertex_key -> keys.contains(vertex_key._2())) // (remove false positives) .filter(vertex_key -> vertex_filter.test(vertex_key._1())) // (remove un-authorized nodes) .collect(Collectors.groupingBy(t2 -> (JsonNode) t2._2(), // (group by key) Collectors.mapping(t2 -> t2._1(), Collectors.toList()))); return grouped_vertices; }
From source file:org.dataconservancy.packaging.tool.integration.PackageGenerationTest.java
/** * Insures the models from ModelResources are included in the final package. * Currently every model exposed by {@code ModelResources#RESOURCE_MAP} * should have a serialization in the final package under the ONT directory * per our spec./*www. j a v a2 s . c o m*/ * * @throws Exception */ @Test public void testOntologiesIncluded() throws Exception { PackageState state = initializer.initialize(DCS_PROFILE); OpenedPackage openedPackage = packager.createPackage(state, folder.getRoot()); List<File> models = new ArrayList<>(); OntDirectoryWalker walker = new OntDirectoryWalker(); walker.doWalk(openedPackage.getBaseDirectory(), models); assertTrue(ModelResources.RESOURCE_MAP.size() > 0); assertEquals(ModelResources.RESOURCE_MAP.size(), models.size()); List<String> packageModelNames = models.stream() .collect(Collectors.mapping(File::getName, Collectors.toList())); ModelResources.RESOURCE_MAP.values().stream().forEach(resource -> { if (resource.startsWith("/")) { resource = resource.substring(1, resource.length()); } assertTrue(packageModelNames.contains(resource)); }); }
From source file:org.apache.hadoop.hbase.client.RawAsyncHBaseAdmin.java
@Override public CompletableFuture<CacheEvictionStats> clearBlockCache(TableName tableName) { CompletableFuture<CacheEvictionStats> future = new CompletableFuture<>(); addListener(getTableHRegionLocations(tableName), (locations, err) -> { if (err != null) { future.completeExceptionally(err); return; }// w w w . j a v a2 s . c om Map<ServerName, List<RegionInfo>> regionInfoByServerName = locations.stream() .filter(l -> l.getRegion() != null).filter(l -> !l.getRegion().isOffline()) .filter(l -> l.getServerName() != null).collect(Collectors.groupingBy(l -> l.getServerName(), Collectors.mapping(l -> l.getRegion(), Collectors.toList()))); List<CompletableFuture<CacheEvictionStats>> futures = new ArrayList<>(); CacheEvictionStatsAggregator aggregator = new CacheEvictionStatsAggregator(); for (Map.Entry<ServerName, List<RegionInfo>> entry : regionInfoByServerName.entrySet()) { futures.add(clearBlockCache(entry.getKey(), entry.getValue()).whenComplete((stats, err2) -> { if (err2 != null) { future.completeExceptionally(unwrapCompletionException(err2)); } else { aggregator.append(stats); } })); } addListener(CompletableFuture.allOf(futures.toArray(new CompletableFuture[futures.size()])), (ret, err3) -> { if (err3 != null) { future.completeExceptionally(unwrapCompletionException(err3)); } else { future.complete(aggregator.sum()); } }); }); return future; }
From source file:org.apache.hadoop.hbase.quotas.SnapshotQuotaObserverChore.java
/** * Sums the snapshot sizes for each namespace. *//*from w ww. j a v a 2s . c o m*/ Map<String, Long> groupSnapshotSizesByNamespace(Multimap<TableName, SnapshotWithSize> snapshotsWithSize) { return snapshotsWithSize.entries().stream().collect(Collectors.groupingBy( // Convert TableName into the namespace string (e) -> e.getKey().getNamespaceAsString(), // Sum the values for namespace Collectors.mapping(Map.Entry::getValue, Collectors.summingLong((sws) -> sws.getSize())))); }
From source file:org.apache.nifi.remote.PeerDescriptionModifier.java
public PeerDescriptionModifier(final NiFiProperties properties) { final Map<Tuple<String, String>, List<Tuple<String, String>>> routeDefinitions = properties .getPropertyKeys().stream().filter(propertyKey -> propertyKey.startsWith(PROPERTY_PREFIX)) .map(propertyKey -> {/*from ww w . j av a 2 s . co m*/ final Matcher matcher = PROPERTY_REGEX.matcher(propertyKey); if (!matcher.matches()) { throw new IllegalArgumentException(format( "Found an invalid Site-to-Site route definition property '%s'." + " Routing property keys should be formatted as 'nifi.remote.route.{protocol}.{name}.{routingConfigName}'." + " Where {protocol} is 'raw' or 'http', and {routingConfigName} is 'when', 'hostname', 'port' or 'secure'.", propertyKey)); } return matcher; }) .collect(Collectors.groupingBy(matcher -> new Tuple<>(matcher.group(1), matcher.group(2)), Collectors.mapping(matcher -> new Tuple<>(matcher.group(3), matcher.group(0)), Collectors.toList()))); routes = routeDefinitions.entrySet().stream().map(routeDefinition -> { final Route route = new Route(); // E.g. [raw, example1], [http, example2] final Tuple<String, String> protocolAndRoutingName = routeDefinition.getKey(); route.protocol = SiteToSiteTransportProtocol.valueOf(protocolAndRoutingName.getKey().toUpperCase()); route.name = protocolAndRoutingName.getValue(); routeDefinition.getValue().forEach(routingConfigNameAndPropertyKey -> { final String routingConfigName = routingConfigNameAndPropertyKey.getKey(); final String propertyKey = routingConfigNameAndPropertyKey.getValue(); final String routingConfigValue = properties.getProperty(propertyKey); try { switch (routingConfigName) { case "when": route.predicate = Query.prepare(routingConfigValue); break; case "hostname": route.hostname = Query.prepare(routingConfigValue); break; case "port": route.port = Query.prepare(routingConfigValue); break; case "secure": route.secure = Query.prepare(routingConfigValue); break; } } catch (AttributeExpressionLanguageParsingException e) { throw new IllegalArgumentException(format( "Failed to parse NiFi expression language configured" + " for Site-to-Site routing property at '%s' due to '%s'", propertyKey, e.getMessage()), e); } }); return route; }).map(Route::validate).collect(Collectors.groupingBy(r -> r.protocol)); }
From source file:org.codelibs.fess.app.web.admin.backup.AdminBackupAction.java
public static Consumer<Writer> getSearchLogNdjsonWriteCall() { return writer -> { final SearchLogBhv bhv = ComponentUtil.getComponent(SearchLogBhv.class); bhv.selectCursor(cb -> {/*from w ww. java 2 s. c o m*/ cb.query().matchAll(); cb.query().addOrderBy_RequestedAt_Asc(); }, entity -> { final StringBuilder buf = new StringBuilder(); buf.append('{'); appendJson("id", entity.getId(), buf).append(','); appendJson("query-id", entity.getQueryId(), buf).append(','); appendJson("user-info-id", entity.getUserInfoId(), buf).append(','); appendJson("user-session-id", entity.getUserSessionId(), buf).append(','); appendJson("user", entity.getUser(), buf).append(','); appendJson("search-word", entity.getSearchWord(), buf).append(','); appendJson("hit-count", entity.getHitCount(), buf).append(','); appendJson("query-page-size", entity.getQueryPageSize(), buf).append(','); appendJson("query-offset", entity.getQueryOffset(), buf).append(','); appendJson("referer", entity.getReferer(), buf).append(','); appendJson("languages", entity.getLanguages(), buf).append(','); appendJson("roles", entity.getRoles(), buf).append(','); appendJson("user-agent", entity.getUserAgent(), buf).append(','); appendJson("client-ip", entity.getClientIp(), buf).append(','); appendJson("access-type", entity.getAccessType(), buf).append(','); appendJson("query-time", entity.getQueryTime(), buf).append(','); appendJson("response-time", entity.getResponseTime(), buf).append(','); appendJson("requested-at", entity.getRequestedAt(), buf).append(','); final Map<String, List<String>> searchFieldMap = entity.getSearchFieldLogList().stream() .collect(Collectors.groupingBy(Pair::getFirst, Collectors.mapping(Pair::getSecond, Collectors.toList()))); appendJson("search-field", searchFieldMap, buf); buf.append('}'); buf.append('\n'); try { writer.write(buf.toString()); } catch (final IOException e) { throw new IORuntimeException(e); } }); }; }
From source file:org.codice.alliance.nsili.common.ResultDAGConverter.java
private static Map<String, List<String>> getAttrMap(List<String> attributes) { return attributes.stream().map(ATTRIBUTE_PATTERN::matcher).filter(Matcher::matches).collect( Collectors.groupingBy(m -> m.group(2), Collectors.mapping(m -> m.group(3), Collectors.toList()))); }
From source file:org.codice.ddf.admin.application.service.migratable.FeatureProcessor.java
/** * Updates the specified features requirements to mark them required or not. * * @param report the report where to record errors if unable to update the features * @param region the region where to update the features * @param jfeatures the features to update * @return <code>true</code> if the features were updated successfully; <code>false</code> * otherwise// ww w.ja v a2 s.co m */ public boolean updateFeaturesRequirements(ProfileMigrationReport report, String region, Set<JsonFeature> jfeatures) { return run(report, region, jfeatures.stream().map(JsonFeature::getId), Operation.UPDATE, jfeatures.stream() .collect(Collectors.groupingBy(JsonFeature::isRequired, Collectors.mapping(JsonFeature::toRequirement, Collectors.toSet()))) .entrySet().stream() .map(requirementsToUpdate -> updateFeaturesRequirements(region, requirementsToUpdate)) .toArray(ThrowingRunnable[]::new)); }
From source file:org.gradoop.flink.model.impl.operators.matching.common.query.QueryHandler.java
/** * Initializes a cache for the given elements where every key maps to multiple elements. * Key selector will be called on every element to extract the caches key. * Value selector will be called on every element to extract the value. * Returns a cache of the form//from w ww. j a v a2s . com * KT -> Set<VT> * * @param elements elements the cache will be build from * @param keySelector key selector function extraction cache keys from elements * @param valueSelector value selector function extraction cache values from elements * @param <EL> the element type * @param <KT> the cache key type * @param <VT> the cache value type * @return cache KT -> Set<VT> */ private <EL, KT, VT> Map<KT, Set<VT>> initSetCache(Collection<EL> elements, Function<EL, KT> keySelector, Function<EL, VT> valueSelector) { return elements.stream() .collect(Collectors.groupingBy(keySelector, Collectors.mapping(valueSelector, Collectors.toSet()))); }