co.cask.cdap.data2.metadata.lineage.LineageCollapser.java Source code

Java tutorial

Introduction

Here is the source code for co.cask.cdap.data2.metadata.lineage.LineageCollapser.java

Source

/*
 * Copyright  2016 Cask Data, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package co.cask.cdap.data2.metadata.lineage;

import co.cask.cdap.proto.Id;
import co.cask.cdap.proto.metadata.lineage.CollapseType;
import com.google.common.base.Preconditions;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.Multimap;
import org.apache.twill.api.RunId;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.Collection;
import java.util.HashSet;
import java.util.Map;
import java.util.Objects;
import java.util.Set;

/**
 * Collapses {@link Relation Relations} based on {@link CollapseType}
 */
public final class LineageCollapser {
    private LineageCollapser() {
        // cannot instantiate objects
    }

    private static final Logger LOG = LoggerFactory.getLogger(LineageCollapser.class);

    /**
     * Collapse {@link Relation}s based on {@link CollapseType}
     * @param relations lineage relations
     * @param collapseTypes fields to collapse relations on
     * @return collapsed relations
     */
    public static Set<CollapsedRelation> collapseRelations(Iterable<Relation> relations,
            Set<CollapseType> collapseTypes) {
        Set<CollapsedRelation> collapsedRelations = new HashSet<>();

        Multimap<CollapseKey, Relation> multimap = HashMultimap.create();
        for (Relation relation : relations) {
            multimap.put(getCollapseKey(relation, collapseTypes), relation);
        }
        LOG.trace("Collapsed relations: {}", multimap.asMap());

        for (Map.Entry<CollapseKey, Collection<Relation>> collapsedEntry : multimap.asMap().entrySet()) {
            Id.NamespacedId data = collapsedEntry.getKey().data;
            Id.Program program = collapsedEntry.getKey().program;

            Set<AccessType> accessTypes = new HashSet<>();
            Set<RunId> runs = new HashSet<>();
            Set<Id.NamespacedId> components = new HashSet<>();

            for (Relation relation : collapsedEntry.getValue()) {
                accessTypes.add(relation.getAccess());
                runs.add(relation.getRun());
                components.addAll(relation.getComponents());
            }
            collapsedRelations.add(toCollapsedRelation(data, program, accessTypes, runs, components));
        }
        return collapsedRelations;
    }

    private static CollapseKey getCollapseKey(Relation relation, Set<CollapseType> collapseTypes) {
        CollapseKeyBuilder builder = new CollapseKeyBuilder(relation.getData(), relation.getProgram());
        if (!collapseTypes.contains(CollapseType.ACCESS)) {
            builder.setAccess(relation.getAccess());
        }
        if (!collapseTypes.contains(CollapseType.RUN)) {
            builder.setRun(relation.getRun());
        }
        if (!collapseTypes.contains(CollapseType.COMPONENT)) {
            builder.setComponents(relation.getComponents());
        }
        return builder.build();
    }

    private static final class CollapseKeyBuilder {
        private final Id.NamespacedId data;
        private final Id.Program program;
        private AccessType access;
        private RunId run;
        private Set<Id.NamespacedId> components;

        public CollapseKeyBuilder(Id.NamespacedId data, Id.Program program) {
            this.data = data;
            this.program = program;
        }

        public void setAccess(AccessType access) {
            this.access = access;
        }

        public void setRun(RunId run) {
            this.run = run;
        }

        public void setComponents(Set<Id.NamespacedId> components) {
            this.components = components;
        }

        public CollapseKey build() {
            return new CollapseKey(data, program, access, run, components);
        }
    }

    private static CollapsedRelation toCollapsedRelation(Id.NamespacedId data, Id.Program program,
            Set<AccessType> accesses, Set<RunId> runs, Set<Id.NamespacedId> components) {
        Preconditions.checkState(data instanceof Id.DatasetInstance || data instanceof Id.Stream,
                "%s should be an instance of dataset or stream", data);
        if (data instanceof Id.DatasetInstance) {
            return new CollapsedRelation((Id.DatasetInstance) data, program, accesses, runs, components);
        }
        return new CollapsedRelation((Id.Stream) data, program, accesses, runs, components);
    }

    private static final class CollapseKey {
        private final Id.NamespacedId data;
        private final Id.Program program;
        private final AccessType access;
        private final RunId run;
        private final Set<? extends Id.NamespacedId> components;

        public CollapseKey(Id.NamespacedId data, Id.Program program, AccessType access, RunId run,
                Set<? extends Id.NamespacedId> components) {
            this.data = data;
            this.program = program;
            this.access = access;
            this.run = run;
            this.components = components;
        }

        @Override
        public boolean equals(Object o) {
            if (this == o) {
                return true;
            }
            if (o == null || getClass() != o.getClass()) {
                return false;
            }
            CollapseKey that = (CollapseKey) o;
            return Objects.equals(data, that.data) && Objects.equals(program, that.program)
                    && Objects.equals(access, that.access) && Objects.equals(run, that.run)
                    && Objects.equals(components, that.components);
        }

        @Override
        public int hashCode() {
            return Objects.hash(data, program, access, run, components);
        }

        @Override
        public String toString() {
            return "CollapseKey{" + "data=" + data + ", program=" + program + ", access=" + access + ", run=" + run
                    + ", components=" + components + '}';
        }
    }
}