com.yahoo.bard.webservice.table.availability.MetricUnionAvailability.java Source code

Java tutorial

Introduction

Here is the source code for com.yahoo.bard.webservice.table.availability.MetricUnionAvailability.java

Source

// Copyright 2017 Yahoo Inc.
// Licensed under the terms of the Apache license. Please see LICENSE.md file distributed with this work for terms.
package com.yahoo.bard.webservice.table.availability;

import com.yahoo.bard.webservice.data.config.names.TableName;
import com.yahoo.bard.webservice.data.metric.MetricColumn;
import com.yahoo.bard.webservice.table.Column;
import com.yahoo.bard.webservice.table.PhysicalTable;
import com.yahoo.bard.webservice.table.resolver.PhysicalDataSourceConstraint;
import com.yahoo.bard.webservice.util.SimplifiedIntervalList;
import com.yahoo.bard.webservice.util.Utils;

import com.google.common.collect.Sets;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.AbstractMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;

import javax.validation.constraints.NotNull;

/**
 * An implementation of {@link Availability} which describes a union of source availabilities, filtered by required
 * metrics and then intersected on time available for required columns.
 * <p>
 * For example, with three source availabilities with the following metric availability:
 * <pre>
 * {@code
 * Source Availability 1:
 * +---------------+
 * |  metric1      |
 * +---------------+
 * |  [2017/2018]  |
 * +---------------+
 *
 * Source Availability 2:
 * +------------------+
 * |  metric2         |
 * +------------------+
 * |  [2016/2017-03]  |
 * +------------------+
 *
 * Source Availability 3:
 * +-----------+
 * |  metric3  |
 * +-----------+
 * |  None     |
 * +-----------+
 * }
 * </pre>
 *
 * Then the available intervals for the following sets of metrics required by a constraint are:
 * <pre>
 * +----------------------+------------------+
 * |  Requested metrics   |  Available       |
 * +----------------------+------------------+
 * |  [metric1]           |  [2017/2018]     |
 * +----------------------+------------------+
 * |  [metric2]           |  [2016/2017-03]  |
 * +----------------------+------------------+
 * |  [metric1, metric2]  | [2017/2018]      |
 * +----------------------+------------------+
 * |  [metric1, metric3]  |  []              |
 * +-------------------+---------------------+
 * </pre>
 */
public class MetricUnionAvailability extends BaseCompositeAvailability implements Availability {

    private static final Logger LOG = LoggerFactory.getLogger(MetricUnionAvailability.class);

    private final Set<String> metricNames;
    private final Map<Availability, Set<String>> availabilitiesToMetricNames;

    /**
     * Constructor.
     *
     * @param physicalTables  A set of <tt>PhysicalTable</tt>s whose dimension schemas are (typically) the same and
     *  Metric columns are unique(i.e. no overlap) on every table
     * @param columns  The set of all configured columns, including dimension columns, that metric union availability
     * will respond with
     */
    public MetricUnionAvailability(@NotNull Set<PhysicalTable> physicalTables, @NotNull Set<Column> columns) {
        super(physicalTables.stream().map(PhysicalTable::getAvailability));

        metricNames = Utils.getSubsetByType(columns, MetricColumn.class).stream().map(MetricColumn::getName)
                .collect(Collectors.toSet());

        // Construct a map of availability to its assigned metric
        // by intersecting its underlying datasource metrics with table configured metrics
        availabilitiesToMetricNames = physicalTables.stream().map(PhysicalTable::getAvailability)
                .collect(Collectors.toMap(Function.identity(), availability -> Sets
                        .intersection(availability.getAllAvailableIntervals().keySet(), metricNames)));

        // validate metric uniqueness such that
        // each table's underlying datasource schema don't have repeated metric column
        if (!isMetricUnique(availabilitiesToMetricNames)) {
            String message = String.format(
                    "Metric columns must be unique across the metric union data sources, but duplicate was found "
                            + "across the following data sources: %s",
                    getDataSourceNames().stream().map(TableName::asName).collect(Collectors.joining(", ")));
            LOG.error(message);
            throw new RuntimeException(message);
        }
    }

    @Override
    public SimplifiedIntervalList getAvailableIntervals(PhysicalDataSourceConstraint constraint) {

        Set<String> dataSourceMetricNames = availabilitiesToMetricNames.values().stream().flatMap(Set::stream)
                .collect(Collectors.toSet());

        // If the table is configured with a column that is not supported by the underlying data sources
        if (!constraint.getMetricNames().stream().allMatch(dataSourceMetricNames::contains)) {
            return new SimplifiedIntervalList();
        }

        return constructSubConstraint(constraint).entrySet().stream()
                .map(entry -> entry.getKey().getAvailableIntervals(entry.getValue()))
                .reduce(SimplifiedIntervalList::intersect).orElse(new SimplifiedIntervalList());

    }

    /**
     * Validates whether the metric columns are unique across each of the underlying datasource.
     *
     * @param availabilityToMetricNames  A map from <tt>Availability</tt> to set of <tt>MetricColumn</tt>
     * contained in that <tt>Availability</tt>
     *
     * @return true if metric is unique across data sources, false otherwise
     */
    private static boolean isMetricUnique(Map<Availability, Set<String>> availabilityToMetricNames) {
        Set<String> uniqueMetrics = new HashSet<>();

        return availabilityToMetricNames.values().stream().flatMap(Set::stream).allMatch(uniqueMetrics::add);
    }

    /**
     * Given a <tt>DataSourceConstraint</tt> - DSC1, construct a map from each availability, A, in this MetricUnion to
     * its <tt>DataSourceConstraint</tt>, DSC2.
     * <p>
     * DSC2 is constructed as the intersection of metric columns between DSC1 and
     * A's available metric columns. There are cases in which the intersection is empty; this method filters out
     * map entries that maps to <tt>DataSourceConstraint</tt> with empty set of metric names.
     *
     * @param constraint  The data constraint whose contained metric columns will be intersected with availabilities'
     * metric columns
     *
     * @return A map from <tt>Availability</tt> to <tt>DataSourceConstraint</tt> with non-empty metric names
     */
    private Map<Availability, PhysicalDataSourceConstraint> constructSubConstraint(
            PhysicalDataSourceConstraint constraint) {
        return availabilitiesToMetricNames.entrySet().stream()
                .map(entry -> new AbstractMap.SimpleEntry<>(entry.getKey(),
                        constraint.withMetricIntersection(entry.getValue())))
                .filter(entry -> !entry.getValue().getMetricNames().isEmpty())
                .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
    }

    @Override
    public String toString() {
        return String.format(
                "MetricUnionAvailability with data source names: [%s] and Configured metric columns: [%s]",
                getDataSourceNames().stream().map(TableName::asName).collect(Collectors.joining(", ")),
                metricNames.stream().collect(Collectors.joining(", ")));
    }
}