gobblin.data.management.retention.profile.MultiDatasetFinder.java Source code

Java tutorial

Introduction

Here is the source code for gobblin.data.management.retention.profile.MultiDatasetFinder.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package gobblin.data.management.retention.profile;

import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Collection;
import java.util.List;
import java.util.Properties;

import lombok.extern.slf4j.Slf4j;

import org.apache.commons.lang3.reflect.ConstructorUtils;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

import com.google.common.base.Splitter;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.typesafe.config.Config;

import gobblin.config.client.ConfigClient;
import gobblin.config.client.ConfigClientCache;
import gobblin.config.client.api.ConfigStoreFactoryDoesNotExistsException;
import gobblin.config.client.api.VersionStabilityPolicy;
import gobblin.config.store.api.ConfigStoreCreationException;
import gobblin.config.store.api.VersionDoesNotExistException;
import gobblin.dataset.Dataset;
import gobblin.dataset.DatasetsFinder;
import gobblin.util.reflection.GobblinConstructorUtils;

/**
 * A DatasetFinder that instantiates multiple DatasetFinders. {@link #findDatasets()} will return a union of all the
 * datasets found by each datasetFinder
 * <p>
 * Subclasses will specify the dataset finder class key name to instantiate. If {@link #datasetFinderClassKey()} is set
 * in jobProps, a single datasetFinder is created. Otherwise {@link #datasetFinderImportedByKey()} is used to find all
 * the importedBy {@link URI}s from gobblin config management. The {@link Config} for each {@link URI} should have a
 * {@link #datasetFinderClassKey()} set.
 * </p>
 *
 */
@Slf4j
public abstract class MultiDatasetFinder implements DatasetsFinder<Dataset> {
    private static final Splitter TAGS_SPLITTER = Splitter.on(",").omitEmptyStrings().trimResults();

    protected abstract String datasetFinderClassKey();

    protected abstract String datasetFinderImportedByKey();

    List<DatasetsFinder<Dataset>> datasetFinders;

    protected final Properties jobProps;

    @SuppressWarnings({ "rawtypes", "unchecked" })
    public MultiDatasetFinder(FileSystem fs, Properties jobProps) {
        this.jobProps = jobProps;
        try {
            this.datasetFinders = Lists.newArrayList();

            if (jobProps.containsKey(datasetFinderClassKey())) {
                try {
                    log.info(String.format("Instantiating datasetfinder %s ",
                            jobProps.getProperty(datasetFinderClassKey())));
                    this.datasetFinders.add((DatasetsFinder) ConstructorUtils.invokeConstructor(
                            Class.forName(jobProps.getProperty(datasetFinderClassKey())), fs, jobProps));
                } catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException
                        | InstantiationException | ClassNotFoundException e) {
                    log.error(String.format("Retention ignored could not instantiate datasetfinder %s.",
                            jobProps.getProperty(datasetFinderClassKey())), e);
                    Throwables.propagate(e);
                }
            } else if (jobProps.containsKey(datasetFinderImportedByKey())) {

                log.info("Instatiating dataset finders using tag "
                        + jobProps.getProperty(datasetFinderImportedByKey()));

                ConfigClient client = ConfigClientCache.getClient(VersionStabilityPolicy.STRONG_LOCAL_STABILITY);
                Collection<URI> importedBys = Lists.newArrayList();

                for (String tag : TAGS_SPLITTER.split(jobProps.getProperty(datasetFinderImportedByKey()))) {
                    log.info("Looking for datasets that import tag " + tag);
                    importedBys.addAll(client.getImportedBy(new URI(tag), false));
                }

                for (URI importedBy : importedBys) {
                    Config datasetClassConfig = client.getConfig(importedBy);

                    try {
                        this.datasetFinders.add((DatasetsFinder) GobblinConstructorUtils.invokeFirstConstructor(
                                Class.forName(datasetClassConfig.getString(datasetFinderClassKey())),
                                ImmutableList.of(fs, jobProps, datasetClassConfig),
                                ImmutableList.of(fs, jobProps)));
                        log.info(String.format("Instantiated datasetfinder %s for %s.",
                                datasetClassConfig.getString(datasetFinderClassKey()), importedBy));
                    } catch (InstantiationException | IllegalAccessException | IllegalArgumentException
                            | InvocationTargetException | NoSuchMethodException | SecurityException
                            | ClassNotFoundException e) {
                        log.error(String.format("Retention ignored for %s. Could not instantiate datasetfinder %s.",
                                importedBy, datasetClassConfig.getString(datasetFinderClassKey())), e);
                        Throwables.propagate(e);
                    }
                }
            } else {
                log.warn(String.format(
                        "NO DATASET_FINDERS FOUND. Either specify dataset finder class at %s or specify the imported tags at %s",
                        datasetFinderClassKey(), datasetFinderImportedByKey()));
            }

        } catch (IllegalArgumentException | VersionDoesNotExistException | ConfigStoreFactoryDoesNotExistsException
                | ConfigStoreCreationException | URISyntaxException e) {
            Throwables.propagate(e);
        }
    }

    @Override
    public List<Dataset> findDatasets() throws IOException {
        List<Dataset> datasets = Lists.newArrayList();
        for (DatasetsFinder<Dataset> df : this.datasetFinders) {
            datasets.addAll(df.findDatasets());
        }
        return datasets;
    }

    @Override
    public Path commonDatasetRoot() {
        throw new UnsupportedOperationException("There is no common dataset root for MultiDatasetFinder");
    }
}