org.apache.falcon.converter.OozieProcessMapper.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.falcon.converter.OozieProcessMapper.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.falcon.converter;

import org.apache.commons.lang.StringUtils;
import org.apache.falcon.FalconException;
import org.apache.falcon.Tag;
import org.apache.falcon.entity.CatalogStorage;
import org.apache.falcon.entity.ClusterHelper;
import org.apache.falcon.entity.EntityUtil;
import org.apache.falcon.entity.FeedHelper;
import org.apache.falcon.entity.ProcessHelper;
import org.apache.falcon.entity.Storage;
import org.apache.falcon.entity.v0.EntityType;
import org.apache.falcon.entity.v0.Frequency;
import org.apache.falcon.entity.v0.SchemaHelper;
import org.apache.falcon.entity.v0.cluster.Cluster;
import org.apache.falcon.entity.v0.cluster.Interfacetype;
import org.apache.falcon.entity.v0.feed.Feed;
import org.apache.falcon.entity.v0.feed.LocationType;
import org.apache.falcon.entity.v0.process.EngineType;
import org.apache.falcon.entity.v0.process.Input;
import org.apache.falcon.entity.v0.process.Output;
import org.apache.falcon.entity.v0.process.Process;
import org.apache.falcon.entity.v0.process.Property;
import org.apache.falcon.entity.v0.process.Workflow;
import org.apache.falcon.expression.ExpressionHelper;
import org.apache.falcon.hadoop.HadoopClientFactory;
import org.apache.falcon.messaging.EntityInstanceMessage.ARG;
import org.apache.falcon.oozie.coordinator.CONTROLS;
import org.apache.falcon.oozie.coordinator.COORDINATORAPP;
import org.apache.falcon.oozie.coordinator.DATAIN;
import org.apache.falcon.oozie.coordinator.DATAOUT;
import org.apache.falcon.oozie.coordinator.DATASETS;
import org.apache.falcon.oozie.coordinator.INPUTEVENTS;
import org.apache.falcon.oozie.coordinator.OUTPUTEVENTS;
import org.apache.falcon.oozie.coordinator.SYNCDATASET;
import org.apache.falcon.oozie.coordinator.WORKFLOW;
import org.apache.falcon.oozie.workflow.ACTION;
import org.apache.falcon.oozie.workflow.DELETE;
import org.apache.falcon.oozie.workflow.PIG;
import org.apache.falcon.oozie.workflow.PREPARE;
import org.apache.falcon.oozie.workflow.WORKFLOWAPP;
import org.apache.falcon.update.UpdateHelper;
import org.apache.hadoop.fs.*;
import org.apache.xerces.dom.ElementNSImpl;
import org.w3c.dom.Document;

import javax.xml.bind.JAXBElement;
import javax.xml.bind.JAXBException;
import javax.xml.bind.Marshaller;
import javax.xml.bind.Unmarshaller;
import javax.xml.transform.dom.DOMResult;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

/**
 * This class maps the Falcon entities into Oozie artifacts.
 */
public class OozieProcessMapper extends AbstractOozieEntityMapper<Process> {
    private static final String DEFAULT_WF_TEMPLATE = "/config/workflow/process-parent-workflow.xml";
    private static final int THIRTY_MINUTES = 30 * 60 * 1000;

    public OozieProcessMapper(Process entity) {
        super(entity);
    }

    @Override
    protected List<COORDINATORAPP> getCoordinators(Cluster cluster, Path bundlePath) throws FalconException {
        try {
            FileSystem fs = HadoopClientFactory.get().createFileSystem(ClusterHelper.getConfiguration(cluster));
            Process process = getEntity();

            //Copy user workflow and lib to staging dir
            Map<String, String> checksums = UpdateHelper.checksumAndCopy(fs,
                    new Path(process.getWorkflow().getPath()), new Path(bundlePath, EntityUtil.PROCESS_USER_DIR));
            if (process.getWorkflow().getLib() != null && fs.exists(new Path(process.getWorkflow().getLib()))) {
                checksums.putAll(UpdateHelper.checksumAndCopy(fs, new Path(process.getWorkflow().getLib()),
                        new Path(bundlePath, EntityUtil.PROCESS_USERLIB_DIR)));
            }

            writeChecksums(fs, new Path(bundlePath, EntityUtil.PROCESS_CHECKSUM_FILE), checksums);
        } catch (IOException e) {
            throw new FalconException("Failed to copy user workflow/lib", e);
        }

        List<COORDINATORAPP> apps = new ArrayList<COORDINATORAPP>();
        apps.add(createDefaultCoordinator(cluster, bundlePath));

        return apps;
    }

    private void writeChecksums(FileSystem fs, Path path, Map<String, String> checksums) throws FalconException {
        try {
            FSDataOutputStream stream = fs.create(path);
            try {
                for (Map.Entry<String, String> entry : checksums.entrySet()) {
                    stream.write((entry.getKey() + "=" + entry.getValue() + "\n").getBytes());
                }
            } finally {
                stream.close();
            }
        } catch (IOException e) {
            throw new FalconException("Failed to copy user workflow/lib", e);
        }
    }

    private Path getUserWorkflowPath(Cluster cluster, Path bundlePath) throws FalconException {
        try {
            FileSystem fs = HadoopClientFactory.get()
                    .createProxiedFileSystem(ClusterHelper.getConfiguration(cluster));
            Process process = getEntity();
            Path wfPath = new Path(process.getWorkflow().getPath());
            if (fs.isFile(wfPath)) {
                return new Path(bundlePath, EntityUtil.PROCESS_USER_DIR + "/" + wfPath.getName());
            } else {
                return new Path(bundlePath, EntityUtil.PROCESS_USER_DIR);
            }
        } catch (IOException e) {
            throw new FalconException("Failed to get workflow path", e);
        }
    }

    private Path getUserLibPath(Cluster cluster, Path bundlePath) throws FalconException {
        try {
            Process process = getEntity();
            if (process.getWorkflow().getLib() == null) {
                return null;
            }
            Path libPath = new Path(process.getWorkflow().getLib());

            FileSystem fs = HadoopClientFactory.get()
                    .createProxiedFileSystem(ClusterHelper.getConfiguration(cluster));
            if (fs.isFile(libPath)) {
                return new Path(bundlePath, EntityUtil.PROCESS_USERLIB_DIR + "/" + libPath.getName());
            } else {
                return new Path(bundlePath, EntityUtil.PROCESS_USERLIB_DIR);
            }
        } catch (IOException e) {
            throw new FalconException("Failed to get user lib path", e);
        }
    }

    /**
     * Creates default oozie coordinator.
     *
     * @param cluster    - Cluster for which the coordiantor app need to be created
     * @param bundlePath - bundle path
     * @return COORDINATORAPP
     * @throws FalconException on Error
     */
    public COORDINATORAPP createDefaultCoordinator(Cluster cluster, Path bundlePath) throws FalconException {
        Process process = getEntity();
        if (process == null) {
            return null;
        }

        COORDINATORAPP coord = new COORDINATORAPP();
        String coordName = EntityUtil.getWorkflowName(Tag.DEFAULT, process).toString();
        Path coordPath = getCoordPath(bundlePath, coordName);

        // coord attributes
        initializeCoordAttributes(cluster, process, coord, coordName);

        CONTROLS controls = initializeControls(process); // controls
        coord.setControls(controls);

        // Configuration
        Map<String, String> props = createCoordDefaultConfiguration(cluster, coordPath, coordName);

        initializeInputPaths(cluster, process, coord, props); // inputs
        initializeOutputPaths(cluster, process, coord, props); // outputs

        Workflow processWorkflow = process.getWorkflow();
        props.put("userWorkflowEngine", processWorkflow.getEngine().value());

        // create parent wf
        createWorkflow(cluster, process, processWorkflow, coordName, coordPath);

        WORKFLOW wf = new WORKFLOW();
        wf.setAppPath(getStoragePath(coordPath.toString()));
        wf.setConfiguration(getCoordConfig(props));

        // set coord action to parent wf
        org.apache.falcon.oozie.coordinator.ACTION action = new org.apache.falcon.oozie.coordinator.ACTION();
        action.setWorkflow(wf);
        coord.setAction(action);

        return coord;
    }

    private void initializeCoordAttributes(Cluster cluster, Process process, COORDINATORAPP coord,
            String coordName) {
        coord.setName(coordName);
        org.apache.falcon.entity.v0.process.Cluster processCluster = ProcessHelper.getCluster(process,
                cluster.getName());
        coord.setStart(SchemaHelper.formatDateUTC(processCluster.getValidity().getStart()));
        coord.setEnd(SchemaHelper.formatDateUTC(processCluster.getValidity().getEnd()));
        coord.setTimezone(process.getTimezone().getID());
        coord.setFrequency("${coord:" + process.getFrequency().toString() + "}");
    }

    private CONTROLS initializeControls(Process process) throws FalconException {
        CONTROLS controls = new CONTROLS();
        controls.setConcurrency(String.valueOf(process.getParallel()));
        controls.setExecution(process.getOrder().name());

        Frequency timeout = process.getTimeout();
        long frequencyInMillis = ExpressionHelper.get().evaluate(process.getFrequency().toString(), Long.class);
        long timeoutInMillis;
        if (timeout != null) {
            timeoutInMillis = ExpressionHelper.get().evaluate(process.getTimeout().toString(), Long.class);
        } else {
            timeoutInMillis = frequencyInMillis * 6;
            if (timeoutInMillis < THIRTY_MINUTES) {
                timeoutInMillis = THIRTY_MINUTES;
            }
        }
        controls.setTimeout(String.valueOf(timeoutInMillis / (1000 * 60)));

        if (timeoutInMillis / frequencyInMillis * 2 > 0) {
            controls.setThrottle(String.valueOf(timeoutInMillis / frequencyInMillis * 2));
        }

        return controls;
    }

    private void initializeInputPaths(Cluster cluster, Process process, COORDINATORAPP coord,
            Map<String, String> props) throws FalconException {
        if (process.getInputs() == null) {
            return;
        }

        List<String> inputFeeds = new ArrayList<String>();
        List<String> inputPaths = new ArrayList<String>();
        List<String> inputFeedStorageTypes = new ArrayList<String>();
        for (Input input : process.getInputs().getInputs()) {
            Feed feed = EntityUtil.getEntity(EntityType.FEED, input.getFeed());
            Storage storage = FeedHelper.createStorage(cluster, feed);

            if (!input.isOptional()) {
                if (coord.getDatasets() == null) {
                    coord.setDatasets(new DATASETS());
                }
                if (coord.getInputEvents() == null) {
                    coord.setInputEvents(new INPUTEVENTS());
                }

                SYNCDATASET syncdataset = createDataSet(feed, cluster, storage, input.getName(), LocationType.DATA);
                coord.getDatasets().getDatasetOrAsyncDataset().add(syncdataset);

                DATAIN datain = createDataIn(input);
                coord.getInputEvents().getDataIn().add(datain);
            }

            String inputExpr = null;
            if (storage.getType() == Storage.TYPE.FILESYSTEM) {
                inputExpr = getELExpression("dataIn('" + input.getName() + "', '" + input.getPartition() + "')");
                props.put(input.getName(), inputExpr);
            } else if (storage.getType() == Storage.TYPE.TABLE) {
                inputExpr = "${coord:dataIn('" + input.getName() + "')}";
                propagateCatalogTableProperties(input, (CatalogStorage) storage, props);
            }

            inputFeeds.add(input.getName());
            inputPaths.add(inputExpr);
            inputFeedStorageTypes.add(storage.getType().name());
        }

        propagateLateDataProperties(inputFeeds, inputPaths, inputFeedStorageTypes, props);
    }

    private void propagateLateDataProperties(List<String> inputFeeds, List<String> inputPaths,
            List<String> inputFeedStorageTypes, Map<String, String> props) {
        // populate late data handler - should-record action
        props.put("falconInputFeeds", join(inputFeeds.iterator(), '#'));
        props.put("falconInPaths", join(inputPaths.iterator(), '#'));

        // storage type for each corresponding feed sent as a param to LateDataHandler
        // needed to compute usage based on storage type in LateDataHandler
        props.put("falconInputFeedStorageTypes", join(inputFeedStorageTypes.iterator(), '#'));
    }

    private void initializeOutputPaths(Cluster cluster, Process process, COORDINATORAPP coord,
            Map<String, String> props) throws FalconException {
        if (process.getOutputs() == null) {
            return;
        }

        if (coord.getDatasets() == null) {
            coord.setDatasets(new DATASETS());
        }

        if (coord.getOutputEvents() == null) {
            coord.setOutputEvents(new OUTPUTEVENTS());
        }

        List<String> outputFeeds = new ArrayList<String>();
        List<String> outputPaths = new ArrayList<String>();
        for (Output output : process.getOutputs().getOutputs()) {
            Feed feed = EntityUtil.getEntity(EntityType.FEED, output.getFeed());
            Storage storage = FeedHelper.createStorage(cluster, feed);

            SYNCDATASET syncdataset = createDataSet(feed, cluster, storage, output.getName(), LocationType.DATA);
            coord.getDatasets().getDatasetOrAsyncDataset().add(syncdataset);

            DATAOUT dataout = createDataOut(output);
            coord.getOutputEvents().getDataOut().add(dataout);

            String outputExpr = "${coord:dataOut('" + output.getName() + "')}";
            outputFeeds.add(output.getName());
            outputPaths.add(outputExpr);

            if (storage.getType() == Storage.TYPE.FILESYSTEM) {
                props.put(output.getName(), outputExpr);

                propagateFileSystemProperties(output, feed, cluster, coord, storage, props);
            } else if (storage.getType() == Storage.TYPE.TABLE) {
                propagateCatalogTableProperties(output, (CatalogStorage) storage, props);
            }
        }

        // Output feed name and path for parent workflow
        props.put(ARG.feedNames.getPropName(), join(outputFeeds.iterator(), ','));
        props.put(ARG.feedInstancePaths.getPropName(), join(outputPaths.iterator(), ','));
    }

    private SYNCDATASET createDataSet(Feed feed, Cluster cluster, Storage storage, String datasetName,
            LocationType locationType) throws FalconException {

        SYNCDATASET syncdataset = new SYNCDATASET();
        syncdataset.setName(datasetName);
        syncdataset.setFrequency("${coord:" + feed.getFrequency().toString() + "}");

        String uriTemplate = storage.getUriTemplate(locationType);
        if (storage.getType() == Storage.TYPE.TABLE) {
            uriTemplate = uriTemplate.replace("thrift", "hcat"); // Oozie requires this!!!
        }
        syncdataset.setUriTemplate(uriTemplate);

        org.apache.falcon.entity.v0.feed.Cluster feedCluster = FeedHelper.getCluster(feed, cluster.getName());
        syncdataset.setInitialInstance(SchemaHelper.formatDateUTC(feedCluster.getValidity().getStart()));
        syncdataset.setTimezone(feed.getTimezone().getID());

        if (feed.getAvailabilityFlag() == null) {
            syncdataset.setDoneFlag("");
        } else {
            syncdataset.setDoneFlag(feed.getAvailabilityFlag());
        }

        return syncdataset;
    }

    private DATAOUT createDataOut(Output output) {
        DATAOUT dataout = new DATAOUT();
        dataout.setName(output.getName());
        dataout.setDataset(output.getName());
        dataout.setInstance(getELExpression(output.getInstance()));
        return dataout;
    }

    private DATAIN createDataIn(Input input) {
        DATAIN datain = new DATAIN();
        datain.setName(input.getName());
        datain.setDataset(input.getName());
        datain.setStartInstance(getELExpression(input.getStart()));
        datain.setEndInstance(getELExpression(input.getEnd()));
        return datain;
    }

    private void propagateFileSystemProperties(Output output, Feed feed, Cluster cluster, COORDINATORAPP coord,
            Storage storage, Map<String, String> props) throws FalconException {

        // stats and meta paths
        createOutputEvent(output, feed, cluster, LocationType.STATS, coord, props, storage);
        createOutputEvent(output, feed, cluster, LocationType.META, coord, props, storage);
        createOutputEvent(output, feed, cluster, LocationType.TMP, coord, props, storage);
    }

    //SUSPEND CHECKSTYLE CHECK ParameterNumberCheck
    private void createOutputEvent(Output output, Feed feed, Cluster cluster, LocationType locType,
            COORDINATORAPP coord, Map<String, String> props, Storage storage) throws FalconException {

        String name = output.getName();
        String type = locType.name().toLowerCase();

        SYNCDATASET dataset = createDataSet(feed, cluster, storage, name + type, locType);
        coord.getDatasets().getDatasetOrAsyncDataset().add(dataset);

        DATAOUT dataout = new DATAOUT();
        dataout.setName(name + type);
        dataout.setDataset(name + type);
        dataout.setInstance(getELExpression(output.getInstance()));

        OUTPUTEVENTS outputEvents = coord.getOutputEvents();
        if (outputEvents == null) {
            outputEvents = new OUTPUTEVENTS();
            coord.setOutputEvents(outputEvents);
        }
        outputEvents.getDataOut().add(dataout);

        String outputExpr = "${coord:dataOut('" + name + type + "')}";
        props.put(name + "." + type, outputExpr);
    }
    //RESUME CHECKSTYLE CHECK ParameterNumberCheck

    private void propagateCommonCatalogTableProperties(CatalogStorage tableStorage, Map<String, String> props,
            String prefix) {
        props.put(prefix + "_storage_type", tableStorage.getType().name());
        props.put(prefix + "_catalog_url", tableStorage.getCatalogUrl());
        props.put(prefix + "_database", tableStorage.getDatabase());
        props.put(prefix + "_table", tableStorage.getTable());
    }

    private void propagateCatalogTableProperties(Input input, CatalogStorage tableStorage,
            Map<String, String> props) {
        String prefix = "falcon_" + input.getName();

        propagateCommonCatalogTableProperties(tableStorage, props, prefix);

        props.put(prefix + "_partition_filter_pig",
                "${coord:dataInPartitionFilter('" + input.getName() + "', 'pig')}");
        props.put(prefix + "_partition_filter_hive",
                "${coord:dataInPartitionFilter('" + input.getName() + "', 'hive')}");
        props.put(prefix + "_partition_filter_java",
                "${coord:dataInPartitionFilter('" + input.getName() + "', 'java')}");
    }

    private void propagateCatalogTableProperties(Output output, CatalogStorage tableStorage,
            Map<String, String> props) {
        String prefix = "falcon_" + output.getName();

        propagateCommonCatalogTableProperties(tableStorage, props, prefix);

        props.put(prefix + "_dataout_partitions", "${coord:dataOutPartitions('" + output.getName() + "')}");
        props.put(prefix + "_dated_partition_value", "${coord:dataOutPartitionValue('" + output.getName() + "', '"
                + tableStorage.getDatedPartitionKey() + "')}");
    }

    private String join(Iterator<String> itr, char sep) {
        String joinedStr = StringUtils.join(itr, sep);
        if (joinedStr.isEmpty()) {
            joinedStr = "null";
        }
        return joinedStr;
    }

    private String getELExpression(String expr) {
        if (expr != null) {
            expr = "${" + expr + "}";
        }
        return expr;
    }

    @Override
    protected Map<String, String> getEntityProperties() {
        Process process = getEntity();
        Map<String, String> props = new HashMap<String, String>();
        if (process.getProperties() != null) {
            for (Property prop : process.getProperties().getProperties()) {
                props.put(prop.getName(), prop.getValue());
            }
        }
        return props;
    }

    protected void createWorkflow(Cluster cluster, Process process, Workflow processWorkflow, String wfName,
            Path parentWfPath) throws FalconException {
        WORKFLOWAPP wfApp = getWorkflowTemplate(DEFAULT_WF_TEMPLATE);
        wfApp.setName(wfName);
        try {
            addLibExtensionsToWorkflow(cluster, wfApp, EntityType.PROCESS, null);
        } catch (IOException e) {
            throw new FalconException("Failed to add library extensions for the workflow", e);
        }

        String userWfPath = getUserWorkflowPath(cluster, parentWfPath.getParent()).toString();
        EngineType engineType = processWorkflow.getEngine();
        for (Object object : wfApp.getDecisionOrForkOrJoin()) {
            if (!(object instanceof ACTION)) {
                continue;
            }

            ACTION action = (ACTION) object;
            String actionName = action.getName();
            if (engineType == EngineType.OOZIE && actionName.equals("user-oozie-workflow")) {
                action.getSubWorkflow().setAppPath("${nameNode}" + userWfPath);
            } else if (engineType == EngineType.PIG && actionName.equals("user-pig-job")) {
                decoratePIGAction(cluster, process, action.getPig(), parentWfPath);
            } else if (engineType == EngineType.HIVE && actionName.equals("user-hive-job")) {
                decorateHiveAction(cluster, process, action, parentWfPath);
            } else if (FALCON_ACTIONS.contains(actionName)) {
                decorateWithOozieRetries(action);
            }
        }

        //Create parent workflow
        marshal(cluster, wfApp, parentWfPath);
    }

    private void decoratePIGAction(Cluster cluster, Process process, PIG pigAction, Path parentWfPath)
            throws FalconException {
        Path userWfPath = getUserWorkflowPath(cluster, parentWfPath.getParent());
        pigAction.setScript("${nameNode}" + userWfPath.toString());

        addPrepareDeleteOutputPath(process, pigAction);

        final List<String> paramList = pigAction.getParam();
        addInputFeedsAsParams(paramList, process, cluster, EngineType.PIG.name().toLowerCase());
        addOutputFeedsAsParams(paramList, process, cluster);

        propagateProcessProperties(pigAction, process);

        Storage.TYPE storageType = getStorageType(cluster, process);
        if (Storage.TYPE.TABLE == storageType) {
            // adds hive-site.xml in pig classpath
            setupHiveConfiguration(cluster, parentWfPath, ""); // DO NOT ADD PREFIX!!!
            pigAction.getFile().add("${wf:appPath()}/conf/hive-site.xml");
        }

        addArchiveForCustomJars(cluster, pigAction.getArchive(), getUserLibPath(cluster, parentWfPath.getParent()));
    }

    private void decorateHiveAction(Cluster cluster, Process process, ACTION wfAction, Path parentWfPath)
            throws FalconException {

        JAXBElement<org.apache.falcon.oozie.hive.ACTION> actionJaxbElement = unMarshalHiveAction(wfAction);
        org.apache.falcon.oozie.hive.ACTION hiveAction = actionJaxbElement.getValue();

        Path userWfPath = getUserWorkflowPath(cluster, parentWfPath.getParent());
        hiveAction.setScript("${nameNode}" + userWfPath.toString());

        addPrepareDeleteOutputPath(process, hiveAction);

        final List<String> paramList = hiveAction.getParam();
        addInputFeedsAsParams(paramList, process, cluster, EngineType.HIVE.name().toLowerCase());
        addOutputFeedsAsParams(paramList, process, cluster);

        propagateProcessProperties(hiveAction, process);

        setupHiveConfiguration(cluster, parentWfPath, "falcon-");

        addArchiveForCustomJars(cluster, hiveAction.getArchive(),
                getUserLibPath(cluster, parentWfPath.getParent()));

        marshalHiveAction(wfAction, actionJaxbElement);
    }

    private void addPrepareDeleteOutputPath(Process process, PIG pigAction) throws FalconException {
        List<String> deleteOutputPathList = getPrepareDeleteOutputPathList(process);
        if (deleteOutputPathList.isEmpty()) {
            return;
        }

        final PREPARE prepare = new PREPARE();
        final List<DELETE> deleteList = prepare.getDelete();

        for (String deletePath : deleteOutputPathList) {
            final DELETE delete = new DELETE();
            delete.setPath(deletePath);
            deleteList.add(delete);
        }

        if (!deleteList.isEmpty()) {
            pigAction.setPrepare(prepare);
        }
    }

    private void addPrepareDeleteOutputPath(Process process, org.apache.falcon.oozie.hive.ACTION hiveAction)
            throws FalconException {

        List<String> deleteOutputPathList = getPrepareDeleteOutputPathList(process);
        if (deleteOutputPathList.isEmpty()) {
            return;
        }

        org.apache.falcon.oozie.hive.PREPARE prepare = new org.apache.falcon.oozie.hive.PREPARE();
        List<org.apache.falcon.oozie.hive.DELETE> deleteList = prepare.getDelete();

        for (String deletePath : deleteOutputPathList) {
            org.apache.falcon.oozie.hive.DELETE delete = new org.apache.falcon.oozie.hive.DELETE();
            delete.setPath(deletePath);
            deleteList.add(delete);
        }

        if (!deleteList.isEmpty()) {
            hiveAction.setPrepare(prepare);
        }
    }

    private List<String> getPrepareDeleteOutputPathList(Process process) throws FalconException {
        final List<String> deleteList = new ArrayList<String>();
        for (Output output : process.getOutputs().getOutputs()) {
            Feed feed = EntityUtil.getEntity(EntityType.FEED, output.getFeed());

            if (FeedHelper.getStorageType(feed) == Storage.TYPE.TABLE) {
                continue; // prepare delete only applies to FileSystem storage
            }

            deleteList.add("${wf:conf('" + output.getName() + "')}");
        }

        return deleteList;
    }

    private void addInputFeedsAsParams(List<String> paramList, Process process, Cluster cluster, String engineType)
            throws FalconException {
        for (Input input : process.getInputs().getInputs()) {
            Feed feed = EntityUtil.getEntity(EntityType.FEED, input.getFeed());
            Storage storage = FeedHelper.createStorage(cluster, feed);

            final String inputName = input.getName();
            if (storage.getType() == Storage.TYPE.FILESYSTEM) {
                paramList.add(inputName + "=${" + inputName + "}"); // no prefix for backwards compatibility
            } else if (storage.getType() == Storage.TYPE.TABLE) {
                final String paramName = "falcon_" + inputName; // prefix 'falcon' for new params
                Map<String, String> props = new HashMap<String, String>();
                propagateCommonCatalogTableProperties((CatalogStorage) storage, props, paramName);
                for (String key : props.keySet()) {
                    paramList.add(key + "=${wf:conf('" + key + "')}");
                }

                paramList.add(
                        paramName + "_filter=${wf:conf('" + paramName + "_partition_filter_" + engineType + "')}");
            }
        }
    }

    private void addOutputFeedsAsParams(List<String> paramList, Process process, Cluster cluster)
            throws FalconException {
        for (Output output : process.getOutputs().getOutputs()) {
            Feed feed = EntityUtil.getEntity(EntityType.FEED, output.getFeed());
            Storage storage = FeedHelper.createStorage(cluster, feed);

            if (storage.getType() == Storage.TYPE.FILESYSTEM) {
                final String outputName = output.getName(); // no prefix for backwards compatibility
                paramList.add(outputName + "=${" + outputName + "}");
            } else if (storage.getType() == Storage.TYPE.TABLE) {
                Map<String, String> props = new HashMap<String, String>();
                propagateCatalogTableProperties(output, (CatalogStorage) storage, props); // prefix is auto added
                for (String key : props.keySet()) {
                    paramList.add(key + "=${wf:conf('" + key + "')}");
                }
            }
        }
    }

    private void propagateProcessProperties(PIG pigAction, Process process) {
        org.apache.falcon.entity.v0.process.Properties processProperties = process.getProperties();
        if (processProperties == null) {
            return;
        }

        // Propagate user defined properties to job configuration
        final List<org.apache.falcon.oozie.workflow.CONFIGURATION.Property> configuration = pigAction
                .getConfiguration().getProperty();

        // Propagate user defined properties to pig script as macros
        // passed as parameters -p name=value that can be accessed as $name
        final List<String> paramList = pigAction.getParam();

        for (org.apache.falcon.entity.v0.process.Property property : processProperties.getProperties()) {
            org.apache.falcon.oozie.workflow.CONFIGURATION.Property configProperty = new org.apache.falcon.oozie.workflow.CONFIGURATION.Property();
            configProperty.setName(property.getName());
            configProperty.setValue(property.getValue());
            configuration.add(configProperty);

            paramList.add(property.getName() + "=" + property.getValue());
        }
    }

    private void propagateProcessProperties(org.apache.falcon.oozie.hive.ACTION hiveAction, Process process) {
        org.apache.falcon.entity.v0.process.Properties processProperties = process.getProperties();
        if (processProperties == null) {
            return;
        }

        // Propagate user defined properties to job configuration
        final List<org.apache.falcon.oozie.hive.CONFIGURATION.Property> configuration = hiveAction
                .getConfiguration().getProperty();

        // Propagate user defined properties to pig script as macros
        // passed as parameters -p name=value that can be accessed as $name
        final List<String> paramList = hiveAction.getParam();

        for (org.apache.falcon.entity.v0.process.Property property : processProperties.getProperties()) {
            org.apache.falcon.oozie.hive.CONFIGURATION.Property configProperty = new org.apache.falcon.oozie.hive.CONFIGURATION.Property();
            configProperty.setName(property.getName());
            configProperty.setValue(property.getValue());
            configuration.add(configProperty);

            paramList.add(property.getName() + "=" + property.getValue());
        }
    }

    private Storage.TYPE getStorageType(Cluster cluster, Process process) throws FalconException {
        Storage.TYPE storageType = Storage.TYPE.FILESYSTEM;
        if (process.getInputs() == null) {
            return storageType;
        }

        for (Input input : process.getInputs().getInputs()) {
            Feed feed = EntityUtil.getEntity(EntityType.FEED, input.getFeed());
            storageType = FeedHelper.getStorageType(feed, cluster);
            if (Storage.TYPE.TABLE == storageType) {
                break;
            }
        }

        return storageType;
    }

    // creates hive-site.xml configuration in conf dir.
    private void setupHiveConfiguration(Cluster cluster, Path wfPath, String prefix) throws FalconException {
        String catalogUrl = ClusterHelper.getInterface(cluster, Interfacetype.REGISTRY).getEndpoint();
        try {
            FileSystem fs = HadoopClientFactory.get().createFileSystem(ClusterHelper.getConfiguration(cluster));
            Path confPath = new Path(wfPath, "conf");
            createHiveConf(fs, confPath, catalogUrl, cluster, prefix);
        } catch (IOException e) {
            throw new FalconException(e);
        }
    }

    private void addArchiveForCustomJars(Cluster cluster, List<String> archiveList, Path libPath)
            throws FalconException {
        if (libPath == null) {
            return;
        }

        try {
            final FileSystem fs = libPath.getFileSystem(ClusterHelper.getConfiguration(cluster));
            if (fs.isFile(libPath)) { // File, not a Dir
                archiveList.add(libPath.toString());
                return;
            }

            // lib path is a directory, add each file under the lib dir to archive
            final FileStatus[] fileStatuses = fs.listStatus(libPath, new PathFilter() {
                @Override
                public boolean accept(Path path) {
                    try {
                        return fs.isFile(path) && path.getName().endsWith(".jar");
                    } catch (IOException ignore) {
                        return false;
                    }
                }
            });

            for (FileStatus fileStatus : fileStatuses) {
                archiveList.add(fileStatus.getPath().toString());
            }
        } catch (IOException e) {
            throw new FalconException("Error adding archive for custom jars under: " + libPath, e);
        }
    }

    @SuppressWarnings("unchecked")
    protected JAXBElement<org.apache.falcon.oozie.hive.ACTION> unMarshalHiveAction(ACTION wfAction) {
        try {
            Unmarshaller unmarshaller = HIVE_ACTION_JAXB_CONTEXT.createUnmarshaller();
            unmarshaller.setEventHandler(new javax.xml.bind.helpers.DefaultValidationEventHandler());
            return (JAXBElement<org.apache.falcon.oozie.hive.ACTION>) unmarshaller
                    .unmarshal((ElementNSImpl) wfAction.getAny());
        } catch (JAXBException e) {
            throw new RuntimeException("Unable to unmarshall hive action.", e);
        }
    }

    protected void marshalHiveAction(ACTION wfAction,
            JAXBElement<org.apache.falcon.oozie.hive.ACTION> actionjaxbElement) {
        try {
            DOMResult hiveActionDOM = new DOMResult();
            Marshaller marshaller = HIVE_ACTION_JAXB_CONTEXT.createMarshaller();
            marshaller.marshal(actionjaxbElement, hiveActionDOM);
            wfAction.setAny(((Document) hiveActionDOM.getNode()).getDocumentElement());
        } catch (JAXBException e) {
            throw new RuntimeException("Unable to marshall hive action.", e);
        }
    }
}