org.apache.gobblin.data.management.retention.policy.CombineRetentionPolicy.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.gobblin.data.management.retention.policy.CombineRetentionPolicy.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.gobblin.data.management.retention.policy;

import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Properties;
import java.util.Set;
import javax.annotation.Nullable;

import org.apache.commons.lang3.reflect.ConstructorUtils;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Function;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;

import org.apache.gobblin.data.management.retention.DatasetCleaner;
import org.apache.gobblin.data.management.version.DatasetVersion;

/**
 * Implementation of {@link org.apache.gobblin.data.management.retention.policy.RetentionPolicy} that allows combining different
 * policies through a union or intersect operation. It will combine the delete sets from each sub-policy using the
 * specified operation.
 *
 * <p>
 * For example, if there are five versions of a dataset, a, b, c, d, e, policy1 would delete versions a, b, while
 * policy2 would delete versions b,c, using {@link CombineRetentionPolicy} will delete versions a, b, c if the
 * operation is UNION, or it will delete only version b if the operation is INTERSECT.
 * </p>
 *
 * <p>
 *   {@link CombineRetentionPolicy} expects the following configurations:
 *   * gobblin.retention.combine.retention.policy.class.* : specifies the classes of the policies to combine. * can be
 *            any value, and each such configuration defines only one class.
 *   * gobblin.retention.combine.retention.policy.delete.sets.combine.operation : operation used to combine delete
 *            sets. Can be UNION or INTERSECT.
 *   Additionally, any configuration necessary for combined policies must be specified.
 * </p>
 */
public class CombineRetentionPolicy<T extends DatasetVersion> implements RetentionPolicy<T> {

    public static final String RETENTION_POLICIES_PREFIX = DatasetCleaner.CONFIGURATION_KEY_PREFIX
            + "combine.retention.policy.class.";
    public static final String DELETE_SETS_COMBINE_OPERATION = DatasetCleaner.CONFIGURATION_KEY_PREFIX
            + "combine.retention.policy.delete.sets.combine.operation";

    public enum DeletableCombineOperation {
        INTERSECT, UNION
    }

    private final List<RetentionPolicy<T>> retentionPolicies;
    private final DeletableCombineOperation combineOperation;

    public CombineRetentionPolicy(List<RetentionPolicy<T>> retentionPolicies,
            DeletableCombineOperation combineOperation) {
        this.combineOperation = combineOperation;
        this.retentionPolicies = retentionPolicies;
    }

    @SuppressWarnings("unchecked")
    public CombineRetentionPolicy(Properties props) throws IOException {
        Preconditions.checkArgument(props.containsKey(DELETE_SETS_COMBINE_OPERATION),
                "Combine operation not specified.");

        ImmutableList.Builder<RetentionPolicy<T>> builder = ImmutableList.builder();

        for (String property : props.stringPropertyNames()) {
            if (property.startsWith(RETENTION_POLICIES_PREFIX)) {

                try {
                    builder.add((RetentionPolicy<T>) ConstructorUtils
                            .invokeConstructor(Class.forName(props.getProperty(property)), props));
                } catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException
                        | InstantiationException | ClassNotFoundException e) {
                    throw new IllegalArgumentException(e);
                }
            }
        }

        this.retentionPolicies = builder.build();
        if (this.retentionPolicies.size() == 0) {
            throw new IOException(
                    "No retention policies specified for " + CombineRetentionPolicy.class.getCanonicalName());
        }

        this.combineOperation = DeletableCombineOperation
                .valueOf(props.getProperty(DELETE_SETS_COMBINE_OPERATION).toUpperCase());

    }

    /**
     * Returns the most specific common superclass for the {@link #versionClass} of each embedded policy.
     */
    @SuppressWarnings("unchecked")
    @Override
    public Class<T> versionClass() {
        if (this.retentionPolicies.size() == 1) {
            return (Class<T>) this.retentionPolicies.get(0).versionClass();
        }

        Class<T> klazz = (Class<T>) this.retentionPolicies.get(0).versionClass();
        for (RetentionPolicy<T> policy : this.retentionPolicies) {
            klazz = commonSuperclass(klazz, (Class<T>) policy.versionClass());
        }
        return klazz;
    }

    @Override
    public Collection<T> listDeletableVersions(final List<T> allVersions) {

        List<Set<T>> candidateDeletableVersions = Lists.newArrayList(
                Iterables.transform(this.retentionPolicies, new Function<RetentionPolicy<T>, Set<T>>() {
                    @SuppressWarnings("deprecation")
                    @Nullable
                    @Override
                    public Set<T> apply(RetentionPolicy<T> input) {
                        return Sets.newHashSet(input.listDeletableVersions(allVersions));
                    }
                }));

        switch (this.combineOperation) {
        case INTERSECT:
            return intersectDatasetVersions(candidateDeletableVersions);
        case UNION:
            return unionDatasetVersions(candidateDeletableVersions);
        default:
            throw new RuntimeException("Combine operation " + this.combineOperation + " not recognized.");
        }

    }

    @VisibleForTesting
    @SuppressWarnings("unchecked")
    public Class<T> commonSuperclass(Class<T> classA, Class<T> classB) {

        if (classA.isAssignableFrom(classB)) {
            // a is superclass of b, so return class of a
            return classA;
        }
        // a is not superclass of b. Either b is superclass of a, or they are not in same branch
        // find closest superclass of a that is also a superclass of b
        Class<?> klazz = classA;
        while (!klazz.isAssignableFrom(classB)) {
            klazz = klazz.getSuperclass();
        }
        if (DatasetVersion.class.isAssignableFrom(klazz)) {
            return (Class<T>) klazz;
        }
        // this should never happen, but there for safety
        return (Class<T>) DatasetVersion.class;
    }

    private Set<T> intersectDatasetVersions(Collection<Set<T>> sets) {
        if (sets.size() <= 0) {
            return Sets.newHashSet();
        }
        Iterator<Set<T>> it = sets.iterator();
        Set<T> outputSet = it.next();
        while (it.hasNext()) {
            outputSet = Sets.intersection(outputSet, it.next());
        }
        return outputSet;
    }

    private Set<T> unionDatasetVersions(Collection<Set<T>> sets) {
        if (sets.size() <= 0) {
            return Sets.newHashSet();
        }
        Iterator<Set<T>> it = sets.iterator();
        Set<T> outputSet = it.next();
        while (it.hasNext()) {
            outputSet = Sets.union(outputSet, it.next());
        }
        return outputSet;
    }
}