List of usage examples for java.util.stream Collectors toMap
public static <T, K, U> Collector<T, ?, Map<K, U>> toMap(Function<? super T, ? extends K> keyMapper, Function<? super T, ? extends U> valueMapper, BinaryOperator<U> mergeFunction)
From source file:ddf.catalog.history.Historian.java
private Map<String, Metacard> query(Filter filter) throws UnsupportedQueryException { SourceResponse response = catalogProvider().query( new QueryRequestImpl(new QueryImpl(filter, 1, 250, null, false, TimeUnit.SECONDS.toMillis(10)))); return response.getResults().stream().map(Result::getMetacard).filter(Objects::nonNull) .collect(Collectors.toMap(Metacard::getId, Function.identity(), Historian::firstInWinsMerge)); }
From source file:ddf.catalog.history.Historian.java
private Map<String, List<ContentItem>> getContent(Collection<ReadStorageRequest> ids) { return ids.stream().map(this::getStorageItem).filter(Objects::nonNull) .map(ReadStorageResponse::getContentItem).filter(Objects::nonNull) .collect(Collectors.toMap(ContentItem::getId, Lists::newArrayList, (l, r) -> { l.addAll(r);/*from w w w .j a va2 s. c o m*/ return l; })); }
From source file:com.ggvaidya.scinames.summary.NameStabilityView.java
public void init() { Project project = projectView.getProject(); // What do we actually need to do? boolean flag_calculateNameSimilarity = (toCalculate & NAME_SIMILARITY) == NAME_SIMILARITY; boolean flag_calculateClusterSimilarity = (toCalculate & CLUSTER_SIMILARITY) == CLUSTER_SIMILARITY; boolean flag_calculateCircumscriptionSimilarity = (toCalculate & CIRCUMSCRIPTIONAL_SIMILARITY) == CIRCUMSCRIPTIONAL_SIMILARITY; // Setup stage. stage.setTitle("Name stability between " + project.getDatasets().size() + " datasets"); // Setup table. controller.getTableEditableProperty().set(false); //controller.setTableColumnResizeProperty(TableView.CONSTRAINED_RESIZE_POLICY); ObservableList<TableColumn> cols = controller.getTableColumnsProperty(); cols.clear();//from w w w . j av a2 s . c o m // Precalculating. Table<Dataset, String, String> precalc = HashBasedTable.create(); // Set up columns. cols.add(createTableColumnFromPrecalc(precalc, "dataset")); cols.add(createTableColumnFromPrecalc(precalc, "date")); cols.add(createTableColumnFromPrecalc(precalc, "year")); cols.add(createTableColumnFromPrecalc(precalc, "count_binomial")); cols.add(createTableColumnFromPrecalc(precalc, "count_genera")); cols.add(createTableColumnFromPrecalc(precalc, "count_monotypic_genera")); cols.add(createTableColumnFromPrecalc(precalc, "names_added")); //cols.add(createTableColumnFromPrecalc(precalc, "names_added_list")); cols.add(createTableColumnFromPrecalc(precalc, "names_deleted")); //cols.add(createTableColumnFromPrecalc(precalc, "names_deleted_list")); cols.add(createTableColumnFromPrecalc(precalc, "species_added")); //cols.add(createTableColumnFromPrecalc(precalc, "species_added_list")); cols.add(createTableColumnFromPrecalc(precalc, "species_deleted")); //cols.add(createTableColumnFromPrecalc(precalc, "species_deleted_list")); cols.add(createTableColumnFromPrecalc(precalc, "mean_binomials_per_genera")); cols.add(createTableColumnFromPrecalc(precalc, "median_binomials_per_genera")); cols.add(createTableColumnFromPrecalc(precalc, "mode_binomials_per_genera_list")); /* All them stability calculations */ if (flag_calculateNameSimilarity) { cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_prev")); cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_prev_pc_this")); cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_prev_pc_union")); cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_prev_pc_prev")); cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_next")); cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_next_pc_this")); cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_next_pc_union")); cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_next_pc_next")); cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_first")); cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_first_pc_this")); cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_first_pc_union")); cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_first_pc_first")); cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_last")); cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_last_pc_this")); cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_last_pc_union")); cols.add(createTableColumnFromPrecalc(precalc, "names_identical_to_last_pc_last")); } if (flag_calculateClusterSimilarity) { cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_prev")); cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_prev_pc_this")); cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_prev_pc_union")); cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_prev_pc_prev")); cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_next")); cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_next_pc_this")); cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_next_pc_union")); cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_next_pc_next")); cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_first")); cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_first_pc_this")); cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_first_pc_union")); cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_first_pc_first")); cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_last")); cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_last_pc_this")); cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_last_pc_union")); cols.add(createTableColumnFromPrecalc(precalc, "clusters_identical_to_last_pc_last")); } if (flag_calculateCircumscriptionSimilarity) { cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_prev")); cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_prev_pc_this")); cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_prev_pc_union")); cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_prev_pc_prev")); cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_next")); cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_next_pc_this")); cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_next_pc_union")); cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_next_pc_next")); cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_first")); cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_first_pc_this")); cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_first_pc_union")); cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_first_pc_first")); cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_last")); cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_last_pc_this")); cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_last_pc_union")); cols.add(createTableColumnFromPrecalc(precalc, "circumscriptions_identical_to_last_pc_last")); } Set<String> recognitionColumns = new HashSet<>(); // Calculate binomials per dataset. Map<Name, Set<Dataset>> datasetsPerName = new HashMap<>(); // Prepare to loop! List<Dataset> checklists = project.getChecklists(); // BIRD HACK! Include all datasets! // checklists = project.getDatasets(); // Set table items. We're only interested in checklists, because // there's no such thing as "name stability" between non-checklist datasets. controller.getTableItemsProperty().set(FXCollections.observableArrayList(checklists)); List<Dataset> prevChecklists = new LinkedList<>(); Dataset firstChecklist = checklists.get(0); Dataset lastChecklist = checklists.get(checklists.size() - 1); // TODO: This used to be prevDataset, but prevChecklist makes a lot more sense, since we // want to compare checklists with each other, ignoring datasets. Would be nice if someone // with copious free time could look over the calculations and make sure they don't assume // that the previous checklist is also the previous dataset? Dataset prevChecklist = null; int index = -1; for (Dataset ds : checklists) { index++; Dataset nextChecklist = (index < (checklists.size() - 1) ? checklists.get(index + 1) : null); precalc.put(ds, "dataset", ds.getName()); precalc.put(ds, "date", ds.getDate().asYYYYmmDD("-")); precalc.put(ds, "year", ds.getDate().getYearAsString()); Set<Name> recognizedBinomials = project.getRecognizedNames(ds).stream().flatMap(n -> n.asBinomial()) .collect(Collectors.toSet()); precalc.put(ds, "count_binomial", String.valueOf(recognizedBinomials.size())); Set<Name> recognizedGenera = recognizedBinomials.stream().flatMap(n -> n.asGenus()) .collect(Collectors.toSet()); precalc.put(ds, "count_genera", String.valueOf(recognizedGenera.size())); precalc.put(ds, "mean_binomials_per_genera", new BigDecimal(((double) recognizedBinomials.size()) / recognizedGenera.size()) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); Map<Name, List<Name>> countBinomialsPerGenus = recognizedBinomials.stream() // Eliminate names that have zero (or more than one?!) genus name. .filter(n -> (n.asGenus().count() == 1)) .collect(Collectors.groupingBy(n -> n.asGenus().findAny().get())); /* LOGGER.info("Debugging: list of " + recognizedGenera.size() + " genera: " + recognizedGenera.stream().map(n -> n.getFullName()).collect(Collectors.joining(", ")) ); */ precalc.put(ds, "count_monotypic_genera", String.valueOf(countBinomialsPerGenus.entrySet().stream() .filter(entry -> new HashSet<>(entry.getValue()).size() == 1).count())); /* LOGGER.info("Debugging: list of monotypic genera: " + countBinomialsPerGenus.entrySet().stream() .filter(entry -> new HashSet<>(entry.getValue()).size() == 1) .map(entry -> entry.getKey().getFullName()) .collect(Collectors.joining(", ")) ); */ // Species added and deleted Set<Name> namesAdded = ds.getChanges(project).filter(ch -> ch.getType().equals(ChangeType.ADDITION)) .flatMap(ch -> ch.getToStream()).collect(Collectors.toSet()); Set<Name> namesDeleted = ds.getChanges(project).filter(ch -> ch.getType().equals(ChangeType.DELETION)) .flatMap(ch -> ch.getFromStream()).collect(Collectors.toSet()); // TODO: This isn't so useful -- the more useful measure would be the number of all species added // and all species deleted, making sure there isn't a cluster-al overlap. precalc.put(ds, "names_added", String.valueOf(namesAdded.size())); //precalc.put(ds, "names_added_list", namesAdded.stream().sorted().map(n -> n.getFullName()).collect(Collectors.joining(", "))); precalc.put(ds, "names_deleted", String.valueOf(namesDeleted.size())); //precalc.put(ds, "names_deleted_list", namesDeleted.stream().sorted().map(n -> n.getFullName()).collect(Collectors.joining(", "))); // Eliminate names that have been added, but were previously recognized at the species level. Set<Name> speciesAdded = namesAdded; if (prevChecklist != null) { Set<Name> prevRecognizedNames = project.getNameClusterManager() .getClusters(project.getRecognizedNames(prevChecklist)).stream() .flatMap(nc -> nc.getNames().stream()).collect(Collectors.toSet()); speciesAdded = namesAdded.stream().filter(n -> !prevRecognizedNames.contains(n)) .collect(Collectors.toSet()); } // Eliminate names that are still represented in the checklist by a species cluster. // (Note that this includes cases where a subspecies is removed, but another subspecies // or the nominal species is still recognized!) Set<Name> currentlyRecognizedBinomialNames = project.getNameClusterManager() .getClusters(project.getRecognizedNames(ds)).stream().flatMap(nc -> nc.getNames().stream()) .flatMap(n -> n.asBinomial()).collect(Collectors.toSet()); Set<Name> speciesDeleted = namesDeleted.stream() .filter(n -> !n.asBinomial().anyMatch(bn -> currentlyRecognizedBinomialNames.contains(bn))) .collect(Collectors.toSet()); precalc.put(ds, "species_added", String.valueOf(speciesAdded.size())); precalc.put(ds, "species_added_list", speciesAdded.stream().sorted().map(n -> n.getFullName()).collect(Collectors.joining(", "))); precalc.put(ds, "species_deleted", String.valueOf(speciesDeleted.size())); precalc.put(ds, "species_deleted_list", speciesDeleted.stream().sorted().map(n -> n.getFullName()).collect(Collectors.joining(", "))); // Measures of species per genera java.util.Map<String, Set<Name>> binomialsPerGenera = recognizedBinomials.stream() .collect(Collectors.toMap(n -> n.getGenus(), n -> { Set<Name> set = new HashSet<Name>(); set.add(n); return set; }, (a, b) -> { a.addAll(b); return a; })); List<Integer> binomialsPerGeneraCounts = binomialsPerGenera.values().stream().map(set -> set.size()) .sorted().collect(Collectors.toList()); Frequency freq = new Frequency(); for (String genus : binomialsPerGenera.keySet()) { // Blech. for (Name binom : binomialsPerGenera.get(genus)) { freq.addValue(genus); } } List<Comparable<?>> modeGenera = freq.getMode(); precalc.put(ds, "mode_binomials_per_genera_list", modeGenera.stream().map(o -> o.toString() + ": " + freq.getCount(o) + " binomials") .collect(Collectors.joining("; "))); double[] binomialsPerGeneraCountsAsDouble = binomialsPerGeneraCounts.stream() .mapToDouble(Integer::doubleValue).toArray(); Median median = new Median(); precalc.put(ds, "median_binomials_per_genera", String.valueOf(median.evaluate(binomialsPerGeneraCountsAsDouble))); if (firstChecklist == null) { // precalc.put(ds, "names_identical_to_first", "NA"); // precalc.put(ds, "names_identical_to_first_pc", "NA"); } else { if (flag_calculateNameSimilarity) { precalc.put(ds, "names_identical_to_first", String.valueOf(getBinomialNamesIntersection(project, ds, firstChecklist).size())); precalc.put(ds, "names_identical_to_first_pc_this", new BigDecimal((double) getBinomialNamesIntersection(project, ds, firstChecklist).size() / recognizedBinomials.size() * 100).setScale(2, BigDecimal.ROUND_HALF_EVEN) .toPlainString()); precalc.put(ds, "names_identical_to_first_pc_union", new BigDecimal((double) getBinomialNamesIntersection(project, ds, firstChecklist).size() / getBinomialNamesUnion(project, ds, firstChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "names_identical_to_first_pc_first", new BigDecimal((double) getBinomialNamesIntersection(project, ds, firstChecklist).size() / getBinomialNamesUnion(project, firstChecklist, firstChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); } if (flag_calculateClusterSimilarity) { int clustersForDataset = project.getNameClusterManager().getClusters(recognizedBinomials) .size(); if (clustersForDataset != recognizedBinomials.size()) { throw new RuntimeException( "We have " + clustersForDataset + " clusters for this dataset, but " + recognizedBinomials.size() + " recognized binomials. What?"); } precalc.put(ds, "clusters_identical_to_first", String.valueOf(getBinomialClustersIntersection(project, ds, firstChecklist).size())); precalc.put(ds, "clusters_identical_to_first_pc_this", new BigDecimal( (double) getBinomialClustersIntersection(project, ds, firstChecklist).size() / getBinomialClustersUnion(project, ds, ds).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "clusters_identical_to_first_pc_union", new BigDecimal( (double) getBinomialClustersIntersection(project, ds, firstChecklist).size() / getBinomialClustersUnion(project, ds, firstChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "clusters_identical_to_first_pc_first", new BigDecimal( (double) getBinomialClustersIntersection(project, ds, firstChecklist).size() / getBinomialClustersUnion(project, firstChecklist, firstChecklist).size() * 100).setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); } if (flag_calculateCircumscriptionSimilarity) { precalc.put(ds, "circumscriptions_identical_to_first", String .valueOf(getBinomialTaxonConceptsIntersection(project, ds, firstChecklist).size())); precalc.put(ds, "circumscriptions_identical_to_first_pc_this", new BigDecimal( (double) getBinomialTaxonConceptsIntersection(project, ds, firstChecklist).size() / getBinomialTaxonConceptsUnion(project, ds, ds).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "circumscriptions_identical_to_first_pc_union", new BigDecimal( (double) getBinomialTaxonConceptsIntersection(project, ds, firstChecklist).size() / getBinomialTaxonConceptsUnion(project, ds, firstChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "circumscriptions_identical_to_first_pc_first", new BigDecimal( (double) getBinomialTaxonConceptsIntersection(project, ds, firstChecklist).size() / getBinomialTaxonConceptsUnion(project, firstChecklist, firstChecklist).size() * 100).setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); } } if (lastChecklist == null) { // precalc.put(ds, "names_identical_to_first", "NA"); // precalc.put(ds, "names_identical_to_first_pc", "NA"); } else { if (flag_calculateNameSimilarity) { precalc.put(ds, "names_identical_to_last", String.valueOf(getBinomialNamesIntersection(project, ds, lastChecklist).size())); precalc.put(ds, "names_identical_to_last_pc_this", new BigDecimal((double) getBinomialNamesIntersection(project, ds, lastChecklist).size() / recognizedBinomials.size() * 100).setScale(2, BigDecimal.ROUND_HALF_EVEN) .toPlainString()); precalc.put(ds, "names_identical_to_last_pc_union", new BigDecimal((double) getBinomialNamesIntersection(project, ds, lastChecklist).size() / getBinomialNamesUnion(project, ds, lastChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "names_identical_to_last_pc_last", new BigDecimal((double) getBinomialNamesIntersection(project, ds, lastChecklist).size() / getBinomialNamesUnion(project, lastChecklist, lastChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); } if (flag_calculateClusterSimilarity) { int clustersForDataset = project.getNameClusterManager().getClusters(recognizedBinomials) .size(); if (clustersForDataset != recognizedBinomials.size()) { throw new RuntimeException( "We have " + clustersForDataset + " clusters for this dataset, but " + recognizedBinomials.size() + " recognized binomials. What?"); } precalc.put(ds, "clusters_identical_to_last", String.valueOf(getBinomialClustersIntersection(project, ds, lastChecklist).size())); precalc.put(ds, "clusters_identical_to_last_pc_this", new BigDecimal( (double) getBinomialClustersIntersection(project, ds, lastChecklist).size() / getBinomialClustersUnion(project, ds, ds).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "clusters_identical_to_last_pc_union", new BigDecimal( (double) getBinomialClustersIntersection(project, ds, lastChecklist).size() / getBinomialClustersUnion(project, ds, lastChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "clusters_identical_to_last_pc_last", new BigDecimal( (double) getBinomialClustersIntersection(project, ds, lastChecklist).size() / getBinomialClustersUnion(project, lastChecklist, lastChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); } if (flag_calculateCircumscriptionSimilarity) { precalc.put(ds, "circumscriptions_identical_to_last", String .valueOf(getBinomialTaxonConceptsIntersection(project, ds, lastChecklist).size())); precalc.put(ds, "circumscriptions_identical_to_last_pc_this", new BigDecimal( (double) getBinomialTaxonConceptsIntersection(project, ds, lastChecklist).size() / getBinomialTaxonConceptsUnion(project, ds, ds).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "circumscriptions_identical_to_last_pc_union", new BigDecimal( (double) getBinomialTaxonConceptsIntersection(project, ds, lastChecklist).size() / getBinomialTaxonConceptsUnion(project, ds, lastChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "circumscriptions_identical_to_last_pc_last", new BigDecimal( (double) getBinomialTaxonConceptsIntersection(project, ds, lastChecklist).size() / getBinomialTaxonConceptsUnion(project, lastChecklist, lastChecklist).size() * 100).setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); } } if (prevChecklist == null) { // precalc.put(ds, "names_identical_to_prev", "NA"); // precalc.put(ds, "names_identical_to_prev_pc", "NA"); } else { if (flag_calculateNameSimilarity) { precalc.put(ds, "names_identical_to_prev", String.valueOf(getBinomialNamesIntersection(project, ds, prevChecklist).size())); precalc.put(ds, "names_identical_to_prev_pc_this", new BigDecimal((double) getBinomialNamesIntersection(project, ds, prevChecklist).size() / recognizedBinomials.size() * 100).setScale(2, BigDecimal.ROUND_HALF_EVEN) .toPlainString()); precalc.put(ds, "names_identical_to_prev_pc_union", new BigDecimal((double) getBinomialNamesIntersection(project, ds, prevChecklist).size() / getBinomialNamesUnion(project, ds, prevChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "names_identical_to_prev_pc_prev", new BigDecimal((double) getBinomialNamesIntersection(project, ds, prevChecklist).size() / getBinomialNamesUnion(project, prevChecklist, prevChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); } if (flag_calculateClusterSimilarity) { int clustersForDataset = project.getNameClusterManager().getClusters(recognizedBinomials) .size(); if (clustersForDataset != recognizedBinomials.size()) { throw new RuntimeException( "We have " + clustersForDataset + " clusters for this dataset, but " + recognizedBinomials.size() + " recognized binomials. What?"); } precalc.put(ds, "clusters_identical_to_prev", String.valueOf(getBinomialClustersIntersection(project, ds, prevChecklist).size())); precalc.put(ds, "clusters_identical_to_prev_pc_this", new BigDecimal( (double) getBinomialClustersIntersection(project, ds, prevChecklist).size() / getBinomialClustersUnion(project, ds, ds).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "clusters_identical_to_prev_pc_union", new BigDecimal( (double) getBinomialClustersIntersection(project, ds, prevChecklist).size() / getBinomialClustersUnion(project, ds, prevChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "clusters_identical_to_prev_pc_prev", new BigDecimal( (double) getBinomialClustersIntersection(project, ds, prevChecklist).size() / getBinomialClustersUnion(project, prevChecklist, prevChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); } if (flag_calculateCircumscriptionSimilarity) { precalc.put(ds, "circumscriptions_identical_to_prev", String .valueOf(getBinomialTaxonConceptsIntersection(project, ds, prevChecklist).size())); precalc.put(ds, "circumscriptions_identical_to_prev_pc_this", new BigDecimal( (double) getBinomialTaxonConceptsIntersection(project, ds, prevChecklist).size() / getBinomialTaxonConceptsUnion(project, ds, ds).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "circumscriptions_identical_to_prev_pc_union", new BigDecimal( (double) getBinomialTaxonConceptsIntersection(project, ds, prevChecklist).size() / getBinomialTaxonConceptsUnion(project, ds, prevChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "circumscriptions_identical_to_prev_pc_prev", new BigDecimal( (double) getBinomialTaxonConceptsIntersection(project, ds, prevChecklist).size() / getBinomialTaxonConceptsUnion(project, prevChecklist, prevChecklist).size() * 100).setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); } // FYI, getBinomialTaxonConceptsUnion(project, ds, prevChecklist).size() should always be equal to the number of species in the dataset. } if (nextChecklist == null) { // precalc.put(ds, "names_identical_to_prev", "NA"); // precalc.put(ds, "names_identical_to_prev_pc", "NA"); } else { if (flag_calculateNameSimilarity) { precalc.put(ds, "names_identical_to_next", String.valueOf(getBinomialNamesIntersection(project, ds, nextChecklist).size())); precalc.put(ds, "names_identical_to_next_pc_this", new BigDecimal((double) getBinomialNamesIntersection(project, ds, nextChecklist).size() / recognizedBinomials.size() * 100).setScale(2, BigDecimal.ROUND_HALF_EVEN) .toPlainString()); precalc.put(ds, "names_identical_to_next_pc_union", new BigDecimal((double) getBinomialNamesIntersection(project, ds, nextChecklist).size() / getBinomialNamesUnion(project, ds, nextChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "names_identical_to_next_pc_next", new BigDecimal((double) getBinomialNamesIntersection(project, ds, nextChecklist).size() / getBinomialNamesUnion(project, nextChecklist, nextChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); } if (flag_calculateClusterSimilarity) { int clustersForDataset = project.getNameClusterManager().getClusters(recognizedBinomials) .size(); if (clustersForDataset != recognizedBinomials.size()) { throw new RuntimeException( "We have " + clustersForDataset + " clusters for this dataset, but " + recognizedBinomials.size() + " recognized binomials. What?"); } precalc.put(ds, "clusters_identical_to_next", String.valueOf(getBinomialClustersIntersection(project, ds, nextChecklist).size())); precalc.put(ds, "clusters_identical_to_next_pc_this", new BigDecimal( (double) getBinomialClustersIntersection(project, ds, nextChecklist).size() / getBinomialClustersUnion(project, ds, ds).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "clusters_identical_to_next_pc_union", new BigDecimal( (double) getBinomialClustersIntersection(project, ds, nextChecklist).size() / getBinomialClustersUnion(project, ds, nextChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "clusters_identical_to_next_pc_next", new BigDecimal( (double) getBinomialClustersIntersection(project, ds, nextChecklist).size() / getBinomialClustersUnion(project, nextChecklist, nextChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); } if (flag_calculateCircumscriptionSimilarity) { precalc.put(ds, "circumscriptions_identical_to_next", String .valueOf(getBinomialTaxonConceptsIntersection(project, ds, nextChecklist).size())); precalc.put(ds, "circumscriptions_identical_to_next_pc_this", new BigDecimal( (double) getBinomialTaxonConceptsIntersection(project, ds, nextChecklist).size() / getBinomialTaxonConceptsUnion(project, ds, ds).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "circumscriptions_identical_to_next_pc_union", new BigDecimal( (double) getBinomialTaxonConceptsIntersection(project, ds, nextChecklist).size() / getBinomialTaxonConceptsUnion(project, ds, nextChecklist).size() * 100) .setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); precalc.put(ds, "circumscriptions_identical_to_next_pc_next", new BigDecimal( (double) getBinomialTaxonConceptsIntersection(project, ds, nextChecklist).size() / getBinomialTaxonConceptsUnion(project, nextChecklist, nextChecklist).size() * 100).setScale(2, BigDecimal.ROUND_HALF_EVEN).toPlainString()); } } /* // For the visualization thingie. int total = prevChecklists.size(); List<Integer> counts = new LinkedList<>(); for(Name name: recognizedBinomials) { int prevRecognized = 0; if(!datasetsPerName.containsKey(name)) { datasetsPerName.put(name, new HashSet<>()); } else { prevRecognized = datasetsPerName.get(name).size(); } datasetsPerName.get(name).add(ds); counts.add( (int)( ((double)prevRecognized)/total*100 ) ); } Map<Integer, List<Integer>> countsByPercentage = counts.stream().sorted().collect(Collectors.groupingBy(n -> (int)(n/10)*10)); for(int percentage: countsByPercentage.keySet()) { precalc.put(ds, "previously_recognized_" + percentage + "pc", String.valueOf(countsByPercentage.get(percentage).size())); recognitionColumns.add("previously_recognized_" + percentage + "pc"); } prevChecklists.add(ds); */ // Set up the previous checklist for the next loop. prevChecklist = ds; } /* LinkedList<String> recognitionColumnsList = new LinkedList<>(recognitionColumns); recognitionColumnsList.sort(null); for(String colName: recognitionColumnsList) { cols.add(createTableColumnFromPrecalc(precalc, colName)); }*/ }
From source file:ddf.catalog.history.Historian.java
private Map<String, Metacard> getVersionMetacards(Collection<Metacard> metacards, Function<String, Action> action, Subject subject) { return metacards.stream().filter(MetacardVersionImpl::isNotVersion) .filter(DeletedMetacardImpl::isNotDeleted) .map(metacard -> new MetacardVersionImpl(uuidGenerator.generateUuid(), metacard, action.apply(metacard.getId()), subject)) .collect(Collectors.toMap(MetacardVersionImpl::getVersionOfId, Function.identity(), Historian::firstInWinsMerge)); }
From source file:com.thinkbiganalytics.feedmgr.nifi.CreateFeedBuilder.java
/** * update feed metadata to point to the valid controller services *//*from www. j a v a 2s .c o m*/ private void updateFeedMetadataControllerServiceReferences( List<NifiProperty> updatedControllerServiceProperties) { //map of the previous to new service values Map<String, String> controllerServiceChangeMap = updatedControllerServiceProperties.stream() .collect(Collectors.toMap(p -> p.getProcessorNameTypeKey(), p -> p.getValue(), (service1, service2) -> service1)); if (!updatedControllerServiceProperties.isEmpty()) { feedMetadata.getProperties().stream() .filter(property -> controllerServiceChangeMap.containsKey(property.getProcessorNameTypeKey())) .forEach((NifiProperty p) -> p .setValue(controllerServiceChangeMap.get(p.getProcessorNameTypeKey()))); } }