gobblin.source.extractor.extract.AbstractSource.java Source code

Java tutorial

Introduction

Here is the source code for gobblin.source.extractor.extract.AbstractSource.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package gobblin.source.extractor.extract;

import java.util.List;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;

import gobblin.configuration.ConfigurationKeys;
import gobblin.configuration.SourceState;
import gobblin.configuration.WorkUnitState;
import gobblin.source.Source;
import gobblin.source.extractor.JobCommitPolicy;
import gobblin.source.extractor.WorkUnitRetryPolicy;
import gobblin.source.workunit.ExtractFactory;
import gobblin.source.workunit.WorkUnit;
import gobblin.source.workunit.Extract;
import gobblin.source.workunit.Extract.TableType;

/**
 * A base implementation of {@link gobblin.source.Source} that provides default behavior.
 *
 * @author Yinan Li
 */
public abstract class AbstractSource<S, D> implements Source<S, D> {

    private final ExtractFactory extractFactory = new ExtractFactory("yyyyMMddHHmmss");

    /**
     * Get a list of {@link WorkUnitState}s of previous {@link WorkUnit}s subject for retries.
     *
     * <p>
     *     We use two keys for configuring work unit retries. The first one specifies
     *     whether work unit retries are enabled or not. This is for individual jobs
     *     or a group of jobs that following the same rule for work unit retries.
     *     The second one that is more advanced is for specifying a retry policy.
     *     This one is particularly useful for being a global policy for a group of
     *     jobs that have different job commit policies and want work unit retries only
     *     for a specific job commit policy. The first one probably is sufficient for
     *     most jobs that only need a way to enable/disable work unit retries. The
     *     second one gives users more flexibilities.
     * </p>
     *
     * @param state Source state
     * @return list of {@link WorkUnitState}s of previous {@link WorkUnit}s subject for retries
     */
    protected List<WorkUnitState> getPreviousWorkUnitStatesForRetry(SourceState state) {
        if (Iterables.isEmpty(state.getPreviousWorkUnitStates())) {
            return ImmutableList.of();
        }

        // Determine a work unit retry policy
        WorkUnitRetryPolicy workUnitRetryPolicy;
        if (state.contains(ConfigurationKeys.WORK_UNIT_RETRY_POLICY_KEY)) {
            // Use the given work unit retry policy if specified
            workUnitRetryPolicy = WorkUnitRetryPolicy
                    .forName(state.getProp(ConfigurationKeys.WORK_UNIT_RETRY_POLICY_KEY));
        } else {
            // Otherwise set the retry policy based on if work unit retry is enabled
            boolean retryFailedWorkUnits = state.getPropAsBoolean(ConfigurationKeys.WORK_UNIT_RETRY_ENABLED_KEY,
                    true);
            workUnitRetryPolicy = retryFailedWorkUnits ? WorkUnitRetryPolicy.ALWAYS : WorkUnitRetryPolicy.NEVER;
        }

        if (workUnitRetryPolicy == WorkUnitRetryPolicy.NEVER) {
            return ImmutableList.of();
        }

        List<WorkUnitState> previousWorkUnitStates = Lists.newArrayList();
        // Get previous work units that were not successfully committed (subject for retries)
        for (WorkUnitState workUnitState : state.getPreviousWorkUnitStates()) {
            if (workUnitState.getWorkingState() != WorkUnitState.WorkingState.COMMITTED) {
                if (state.getPropAsBoolean(ConfigurationKeys.OVERWRITE_CONFIGS_IN_STATESTORE,
                        ConfigurationKeys.DEFAULT_OVERWRITE_CONFIGS_IN_STATESTORE)) {
                    // We need to make a copy here since getPreviousWorkUnitStates returns ImmutableWorkUnitStates
                    // for which addAll is not supported
                    WorkUnitState workUnitStateCopy = new WorkUnitState(workUnitState.getWorkunit(), state);
                    workUnitStateCopy.addAll(workUnitState);
                    workUnitStateCopy.overrideWith(state);
                    previousWorkUnitStates.add(workUnitStateCopy);
                } else {
                    previousWorkUnitStates.add(workUnitState);
                }
            }
        }

        if (workUnitRetryPolicy == WorkUnitRetryPolicy.ALWAYS) {
            return previousWorkUnitStates;
        }

        JobCommitPolicy jobCommitPolicy = JobCommitPolicy.forName(state
                .getProp(ConfigurationKeys.JOB_COMMIT_POLICY_KEY, ConfigurationKeys.DEFAULT_JOB_COMMIT_POLICY));
        if ((workUnitRetryPolicy == WorkUnitRetryPolicy.ON_COMMIT_ON_PARTIAL_SUCCESS
                && jobCommitPolicy == JobCommitPolicy.COMMIT_ON_PARTIAL_SUCCESS)
                || (workUnitRetryPolicy == WorkUnitRetryPolicy.ON_COMMIT_ON_FULL_SUCCESS
                        && jobCommitPolicy == JobCommitPolicy.COMMIT_ON_FULL_SUCCESS)) {
            return previousWorkUnitStates;
        }
        // Return an empty list if job commit policy and work unit retry policy do not match
        return ImmutableList.of();
    }

    /**
     * Get a list of previous {@link WorkUnit}s subject for retries.
     *
     * <p>
     *     This method uses {@link AbstractSource#getPreviousWorkUnitStatesForRetry(SourceState)}.
     * </p>
     *
     * @param state Source state
     * @return list of previous {@link WorkUnit}s subject for retries
     */
    protected List<WorkUnit> getPreviousWorkUnitsForRetry(SourceState state) {
        List<WorkUnit> workUnits = Lists.newArrayList();
        for (WorkUnitState workUnitState : getPreviousWorkUnitStatesForRetry(state)) {
            // Make a copy here as getWorkUnit() below returns an ImmutableWorkUnit
            workUnits.add(WorkUnit.copyOf(workUnitState.getWorkunit()));
        }

        return workUnits;
    }

    public Extract createExtract(TableType type, String namespace, String table) {
        return this.extractFactory.getUniqueExtract(type, namespace, table);
    }
}