org.apache.falcon.oozie.process.ProcessExecutionWorkflowBuilder.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.falcon.oozie.process.ProcessExecutionWorkflowBuilder.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.falcon.oozie.process;

import org.apache.commons.lang3.StringUtils;
import org.apache.falcon.FalconException;
import org.apache.falcon.LifeCycle;
import org.apache.falcon.Tag;
import org.apache.falcon.entity.CatalogStorage;
import org.apache.falcon.entity.ClusterHelper;
import org.apache.falcon.entity.EntityUtil;
import org.apache.falcon.entity.FeedHelper;
import org.apache.falcon.entity.ProcessHelper;
import org.apache.falcon.entity.Storage;
import org.apache.falcon.entity.v0.EntityType;
import org.apache.falcon.entity.v0.cluster.Cluster;
import org.apache.falcon.entity.v0.feed.Feed;
import org.apache.falcon.entity.v0.process.Input;
import org.apache.falcon.entity.v0.process.Output;
import org.apache.falcon.entity.v0.process.Process;
import org.apache.falcon.entity.v0.process.Workflow;
import org.apache.falcon.hadoop.HadoopClientFactory;
import org.apache.falcon.oozie.OozieOrchestrationWorkflowBuilder;
import org.apache.falcon.oozie.workflow.ACTION;
import org.apache.falcon.oozie.workflow.CONFIGURATION;
import org.apache.falcon.oozie.workflow.WORKFLOWAPP;
import org.apache.falcon.workflow.WorkflowExecutionContext;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.oozie.client.OozieClient;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Map.Entry;
import java.util.Properties;
import java.util.Set;

/**
 * Base class for building orchestration workflow for process.
 */
public abstract class ProcessExecutionWorkflowBuilder extends OozieOrchestrationWorkflowBuilder<Process> {

    private static final Set<String> FALCON_PROCESS_HIVE_ACTIONS = new HashSet<String>(
            Arrays.asList(new String[] { PREPROCESS_ACTION_NAME, USER_ACTION_NAME, }));

    protected ProcessExecutionWorkflowBuilder(Process entity) {
        super(entity, LifeCycle.EXECUTION);
    }

    @Override
    public Properties build(Cluster cluster, Path buildPath) throws FalconException {
        WORKFLOWAPP wfApp = new WORKFLOWAPP();
        String wfName = EntityUtil.getWorkflowName(Tag.DEFAULT, entity).toString();

        String startAction = USER_ACTION_NAME;
        final boolean isTableStorageType = EntityUtil.isTableStorageType(cluster, entity);

        //Add pre-processing action
        if (shouldPreProcess()) {
            ACTION preProcessAction = getPreProcessingAction(isTableStorageType, Tag.DEFAULT);
            addTransition(preProcessAction, USER_ACTION_NAME, FAIL_POSTPROCESS_ACTION_NAME);
            wfApp.getDecisionOrForkOrJoin().add(preProcessAction);
            startAction = PREPROCESS_ACTION_NAME;
        }

        //Add user action
        ACTION userAction = getUserAction(cluster, buildPath);
        addTransition(userAction, SUCCESS_POSTPROCESS_ACTION_NAME, FAIL_POSTPROCESS_ACTION_NAME);
        wfApp.getDecisionOrForkOrJoin().add(userAction);

        //Add post-processing
        ACTION success = getSuccessPostProcessAction();
        addTransition(success, OK_ACTION_NAME, FAIL_ACTION_NAME);
        wfApp.getDecisionOrForkOrJoin().add(success);

        ACTION fail = getFailPostProcessAction();
        addTransition(fail, FAIL_ACTION_NAME, FAIL_ACTION_NAME);
        wfApp.getDecisionOrForkOrJoin().add(fail);

        decorateWorkflow(wfApp, wfName, startAction);

        addLibExtensionsToWorkflow(cluster, wfApp, null);

        if (isTableStorageType) {
            setupHiveCredentials(cluster, buildPath, wfApp);
        }

        marshal(cluster, wfApp, buildPath);
        Properties props = createDefaultConfiguration(cluster);
        props.putAll(getProperties(buildPath, wfName));
        props.putAll(getWorkflowProperties());
        props.setProperty(OozieClient.APP_PATH, buildPath.toString());

        //Add libpath
        Path libPath = new Path(buildPath, "lib");
        copySharedLibs(cluster, libPath);
        props.put(OozieClient.LIBPATH, libPath.toString());

        Workflow processWorkflow = ((Process) (entity)).getWorkflow();
        propagateUserWorkflowProperties(processWorkflow, props);

        // Write out the config to config-default.xml
        marshal(cluster, wfApp, getConfig(props), buildPath);

        return props;
    }

    private Properties getWorkflowProperties() {
        Properties props = new Properties();
        props.setProperty("srcClusterName", "NA");
        props.setProperty("availabilityFlag", "NA");
        return props;
    }

    protected abstract ACTION getUserAction(Cluster cluster, Path buildPath) throws FalconException;

    private void setupHiveCredentials(Cluster cluster, Path buildPath, WORKFLOWAPP wfApp) throws FalconException {
        // create hive-site.xml file so actions can use it in the classpath
        createHiveConfiguration(cluster, buildPath, ""); // DO NOT ADD PREFIX!!!

        if (isSecurityEnabled) {
            // add hcatalog credentials for secure mode and add a reference to each action
            addHCatalogCredentials(wfApp, cluster, HIVE_CREDENTIAL_NAME, FALCON_PROCESS_HIVE_ACTIONS);
        }
    }

    protected void addInputFeedsAsParams(List<String> paramList, Cluster cluster) throws FalconException {
        if (entity.getInputs() == null) {
            return;
        }

        for (Input input : entity.getInputs().getInputs()) {
            Feed feed = EntityUtil.getEntity(EntityType.FEED, input.getFeed());
            Storage storage = FeedHelper.createStorage(cluster, feed);

            final String inputName = input.getName();
            if (storage.getType() == Storage.TYPE.FILESYSTEM) {
                paramList.add(inputName + "=${" + inputName + "}"); // no prefix for backwards compatibility
            } else if (storage.getType() == Storage.TYPE.TABLE) {
                final String paramName = "falcon_" + inputName; // prefix 'falcon' for new params
                Properties props = new Properties();
                propagateCommonCatalogTableProperties((CatalogStorage) storage, props, paramName);
                for (Object key : props.keySet()) {
                    paramList.add(key + "=${wf:conf('" + key + "')}");
                }

                paramList.add(paramName + "_filter=${wf:conf('" + paramName + "_partition_filter_"
                        + entity.getWorkflow().getEngine().name().toLowerCase() + "')}");
            }
        }
    }

    protected void addOutputFeedsAsParams(List<String> paramList, Cluster cluster) throws FalconException {
        if (entity.getOutputs() == null) {
            return;
        }

        for (Output output : entity.getOutputs().getOutputs()) {
            Feed feed = EntityUtil.getEntity(EntityType.FEED, output.getFeed());
            Storage storage = FeedHelper.createStorage(cluster, feed);

            if (storage.getType() == Storage.TYPE.FILESYSTEM) {
                final String outputName = output.getName(); // no prefix for backwards compatibility
                paramList.add(outputName + "=${" + outputName + "}");
            } else if (storage.getType() == Storage.TYPE.TABLE) {
                Properties props = new Properties();
                propagateCatalogTableProperties(output, (CatalogStorage) storage, props); // prefix is auto added
                for (Object key : props.keySet()) {
                    paramList.add(key + "=${wf:conf('" + key + "')}");
                }

                final String paramName = "falcon_" + output.getName(); // prefix 'falcon' for new params
                paramList.add(paramName + "_partitions=${wf:conf('" + paramName + "_partitions_"
                        + entity.getWorkflow().getEngine().name().toLowerCase() + "')}");
            }
        }
    }

    protected void propagateEntityProperties(CONFIGURATION conf, List<String> paramList) {
        Properties entityProperties = getEntityProperties(entity);

        // Propagate user defined properties to job configuration
        final List<org.apache.falcon.oozie.workflow.CONFIGURATION.Property> configuration = conf.getProperty();

        // Propagate user defined properties to pig script as macros
        // passed as parameters -p name=value that can be accessed as $name
        for (Entry<Object, Object> entry : entityProperties.entrySet()) {
            org.apache.falcon.oozie.workflow.CONFIGURATION.Property configProperty = new org.apache.falcon.oozie.workflow.CONFIGURATION.Property();
            configProperty.setName((String) entry.getKey());
            configProperty.setValue((String) entry.getValue());
            configuration.add(configProperty);

            paramList.add(entry.getKey() + "=" + entry.getValue());
        }
    }

    protected List<String> getPrepareDeleteOutputPathList() throws FalconException {
        final List<String> deleteList = new ArrayList<String>();
        if (entity.getOutputs() == null) {
            return deleteList;
        }

        for (Output output : entity.getOutputs().getOutputs()) {
            Feed feed = EntityUtil.getEntity(EntityType.FEED, output.getFeed());

            if (FeedHelper.getStorageType(feed) == Storage.TYPE.TABLE) {
                continue; // prepare delete only applies to FileSystem storage
            }

            deleteList.add("${wf:conf('" + output.getName() + "')}");
        }

        return deleteList;
    }

    protected void addArchiveForCustomJars(Cluster cluster, List<String> archiveList, String lib)
            throws FalconException {
        if (StringUtils.isEmpty(lib)) {
            return;
        }

        Path libPath = new Path(lib);
        try {
            final FileSystem fs = HadoopClientFactory.get()
                    .createProxiedFileSystem(ClusterHelper.getConfiguration(cluster));
            if (fs.isFile(libPath)) { // File, not a Dir
                archiveList.add(libPath.toString());
                return;
            }

            // lib path is a directory, add each file under the lib dir to archive
            final FileStatus[] fileStatuses = fs.listStatus(libPath, new PathFilter() {
                @Override
                public boolean accept(Path path) {
                    try {
                        return fs.isFile(path) && path.getName().endsWith(".jar");
                    } catch (IOException ignore) {
                        return false;
                    }
                }
            });

            for (FileStatus fileStatus : fileStatuses) {
                archiveList.add(fileStatus.getPath().toString());
            }
        } catch (IOException e) {
            throw new FalconException("Error adding archive for custom jars under: " + libPath, e);
        }
    }

    private void propagateUserWorkflowProperties(Workflow processWorkflow, Properties props) {
        props.put("userWorkflowName",
                ProcessHelper.getProcessWorkflowName(processWorkflow.getName(), entity.getName()));
        props.put("userWorkflowVersion", processWorkflow.getVersion());
        props.put("userWorkflowEngine", processWorkflow.getEngine().value());
    }

    @Override
    protected WorkflowExecutionContext.EntityOperations getOperation() {
        return WorkflowExecutionContext.EntityOperations.GENERATE;
    }
}