org.apache.falcon.oozie.process.ProcessExecutionCoordinatorBuilder.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.falcon.oozie.process.ProcessExecutionCoordinatorBuilder.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.falcon.oozie.process;

import org.apache.commons.lang3.StringUtils;
import org.apache.falcon.FalconException;
import org.apache.falcon.LifeCycle;
import org.apache.falcon.Tag;
import org.apache.falcon.entity.CatalogStorage;
import org.apache.falcon.entity.EntityUtil;
import org.apache.falcon.entity.FeedHelper;
import org.apache.falcon.entity.ProcessHelper;
import org.apache.falcon.entity.Storage;
import org.apache.falcon.entity.v0.EntityType;
import org.apache.falcon.entity.v0.Frequency;
import org.apache.falcon.entity.v0.SchemaHelper;
import org.apache.falcon.entity.v0.cluster.Cluster;
import org.apache.falcon.entity.v0.feed.Feed;
import org.apache.falcon.entity.v0.feed.LocationType;
import org.apache.falcon.entity.v0.process.Input;
import org.apache.falcon.entity.v0.process.Output;
import org.apache.falcon.entity.v0.process.Process;
import org.apache.falcon.expression.ExpressionHelper;
import org.apache.falcon.oozie.OozieCoordinatorBuilder;
import org.apache.falcon.oozie.OozieEntityBuilder;
import org.apache.falcon.oozie.OozieOrchestrationWorkflowBuilder;
import org.apache.falcon.oozie.coordinator.CONTROLS;
import org.apache.falcon.oozie.coordinator.COORDINATORAPP;
import org.apache.falcon.oozie.coordinator.DATAIN;
import org.apache.falcon.oozie.coordinator.DATAOUT;
import org.apache.falcon.oozie.coordinator.DATASETS;
import org.apache.falcon.oozie.coordinator.INPUTEVENTS;
import org.apache.falcon.oozie.coordinator.OUTPUTEVENTS;
import org.apache.falcon.oozie.coordinator.SYNCDATASET;
import org.apache.falcon.oozie.coordinator.WORKFLOW;
import org.apache.falcon.workflow.WorkflowExecutionArgs;
import org.apache.hadoop.fs.Path;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Properties;

/**
 * Builds oozie coordinator for process.
 */
public class ProcessExecutionCoordinatorBuilder extends OozieCoordinatorBuilder<Process> {
    private static final int THIRTY_MINUTES = 30 * 60 * 1000;

    public ProcessExecutionCoordinatorBuilder(Process entity) {
        super(entity, LifeCycle.EXECUTION);
    }

    @Override
    public List<Properties> buildCoords(Cluster cluster, Path buildPath) throws FalconException {
        String coordName = getEntityName();
        Path coordPath = getBuildPath(buildPath);
        copySharedLibs(cluster, new Path(coordPath, "lib"));

        COORDINATORAPP coord = new COORDINATORAPP();
        // coord attributes
        initializeCoordAttributes(cluster, coord, coordName);

        CONTROLS controls = initializeControls(); // controls
        coord.setControls(controls);

        // Configuration
        Properties props = createCoordDefaultConfiguration(coordName);

        initializeInputPaths(cluster, coord, props); // inputs
        initializeOutputPaths(cluster, coord, props); // outputs

        // create parent wf
        Properties wfProps = OozieOrchestrationWorkflowBuilder.get(entity, cluster, Tag.DEFAULT).build(cluster,
                coordPath);

        WORKFLOW wf = new WORKFLOW();
        wf.setAppPath(getStoragePath(wfProps.getProperty(OozieEntityBuilder.ENTITY_PATH)));
        // Add the custom properties set in feed. Else, dryrun won't catch any missing props.
        props.putAll(getEntityProperties(entity));
        wf.setConfiguration(getConfig(props));

        // set coord action to parent wf
        org.apache.falcon.oozie.coordinator.ACTION action = new org.apache.falcon.oozie.coordinator.ACTION();
        action.setWorkflow(wf);

        coord.setAction(action);

        Path marshalPath = marshal(cluster, coord, coordPath);
        return Arrays.asList(getProperties(marshalPath, coordName));
    }

    private void initializeCoordAttributes(Cluster cluster, COORDINATORAPP coord, String coordName) {
        coord.setName(coordName);
        org.apache.falcon.entity.v0.process.Cluster processCluster = ProcessHelper.getCluster(entity,
                cluster.getName());
        coord.setStart(SchemaHelper.formatDateUTC(processCluster.getValidity().getStart()));
        coord.setEnd(SchemaHelper.formatDateUTC(processCluster.getValidity().getEnd()));
        coord.setTimezone(entity.getTimezone().getID());
        coord.setFrequency("${coord:" + entity.getFrequency().toString() + "}");
    }

    private CONTROLS initializeControls() throws FalconException {
        CONTROLS controls = new CONTROLS();
        controls.setConcurrency(String.valueOf(entity.getParallel()));
        controls.setExecution(entity.getOrder().name());

        Frequency timeout = entity.getTimeout();
        long frequencyInMillis = ExpressionHelper.get().evaluate(entity.getFrequency().toString(), Long.class);
        long timeoutInMillis;
        if (timeout != null) {
            timeoutInMillis = ExpressionHelper.get().evaluate(entity.getTimeout().toString(), Long.class);
        } else {
            timeoutInMillis = frequencyInMillis * 6;
            if (timeoutInMillis < THIRTY_MINUTES) {
                timeoutInMillis = THIRTY_MINUTES;
            }
        }
        controls.setTimeout(String.valueOf(timeoutInMillis / (1000 * 60)));

        if (timeoutInMillis / frequencyInMillis * 2 > 0) {
            controls.setThrottle(String.valueOf(timeoutInMillis / frequencyInMillis * 2));
        }

        return controls;
    }

    private void initializeInputPaths(Cluster cluster, COORDINATORAPP coord, Properties props)
            throws FalconException {
        if (entity.getInputs() == null) {
            props.put(WorkflowExecutionArgs.INPUT_FEED_NAMES.getName(), NONE);
            props.put(WorkflowExecutionArgs.INPUT_FEED_PATHS.getName(), NONE);
            props.put(WorkflowExecutionArgs.INPUT_NAMES.getName(), NONE);
            return;
        }

        List<String> inputFeeds = new ArrayList<String>();
        List<String> inputNames = new ArrayList<String>();
        List<String> inputPaths = new ArrayList<String>();
        List<String> inputFeedStorageTypes = new ArrayList<String>();
        for (Input input : entity.getInputs().getInputs()) {
            Feed feed = EntityUtil.getEntity(EntityType.FEED, input.getFeed());
            Storage storage = FeedHelper.createStorage(cluster, feed);

            if (!input.isOptional()) {
                if (coord.getDatasets() == null) {
                    coord.setDatasets(new DATASETS());
                }
                if (coord.getInputEvents() == null) {
                    coord.setInputEvents(new INPUTEVENTS());
                }

                SYNCDATASET syncdataset = createDataSet(feed, cluster, storage, input.getName(), LocationType.DATA);
                if (syncdataset == null) {
                    return;
                }
                coord.getDatasets().getDatasetOrAsyncDataset().add(syncdataset);

                DATAIN datain = createDataIn(input);
                coord.getInputEvents().getDataIn().add(datain);
            }

            String inputExpr = null;
            if (storage.getType() == Storage.TYPE.FILESYSTEM) {
                inputExpr = getELExpression("dataIn('" + input.getName() + "', '" + input.getPartition() + "')");
                props.put(input.getName(), inputExpr);
            } else if (storage.getType() == Storage.TYPE.TABLE) {
                inputExpr = "${coord:dataIn('" + input.getName() + "')}";
                propagateCatalogTableProperties(input, (CatalogStorage) storage, props);
            }

            inputFeeds.add(feed.getName());
            inputPaths.add(inputExpr);
            inputNames.add(input.getName());
            inputFeedStorageTypes.add(storage.getType().name());
        }

        propagateLateDataProperties(inputFeeds, inputNames, inputPaths, inputFeedStorageTypes, props);
    }

    private void propagateLateDataProperties(List<String> inputFeeds, List<String> inputNames,
            List<String> inputPaths, List<String> inputFeedStorageTypes, Properties props) {
        // populate late data handler - should-record action
        props.put(WorkflowExecutionArgs.INPUT_FEED_NAMES.getName(), StringUtils.join(inputFeeds, '#'));
        props.put(WorkflowExecutionArgs.INPUT_NAMES.getName(), StringUtils.join(inputNames, '#'));
        props.put(WorkflowExecutionArgs.INPUT_FEED_PATHS.getName(), StringUtils.join(inputPaths, '#'));

        // storage type for each corresponding feed sent as a param to LateDataHandler
        // needed to compute usage based on storage type in LateDataHandler
        props.put(WorkflowExecutionArgs.INPUT_STORAGE_TYPES.getName(),
                StringUtils.join(inputFeedStorageTypes, '#'));
    }

    private SYNCDATASET createDataSet(Feed feed, Cluster cluster, Storage storage, String datasetName,
            LocationType locationType) throws FalconException {
        SYNCDATASET syncdataset = new SYNCDATASET();
        syncdataset.setName(datasetName);
        syncdataset.setFrequency("${coord:" + feed.getFrequency().toString() + "}");

        String uriTemplate = storage.getUriTemplate(locationType);
        if (uriTemplate == null) {
            return null;
        }
        if (storage.getType() == Storage.TYPE.TABLE) {
            uriTemplate = uriTemplate.replace("thrift", "hcat"); // Oozie requires this!!!
        }
        syncdataset.setUriTemplate(uriTemplate);

        org.apache.falcon.entity.v0.feed.Cluster feedCluster = FeedHelper.getCluster(feed, cluster.getName());
        syncdataset.setInitialInstance(SchemaHelper.formatDateUTC(feedCluster.getValidity().getStart()));
        syncdataset.setTimezone(feed.getTimezone().getID());

        if (feed.getAvailabilityFlag() == null) {
            syncdataset.setDoneFlag("");
        } else {
            syncdataset.setDoneFlag(feed.getAvailabilityFlag());
        }

        return syncdataset;
    }

    private DATAIN createDataIn(Input input) {
        DATAIN datain = new DATAIN();
        datain.setName(input.getName());
        datain.setDataset(input.getName());
        datain.setStartInstance(getELExpression(input.getStart()));
        datain.setEndInstance(getELExpression(input.getEnd()));
        return datain;
    }

    private String getELExpression(String expr) {
        if (expr != null) {
            expr = "${" + expr + "}";
        }
        return expr;
    }

    private void initializeOutputPaths(Cluster cluster, COORDINATORAPP coord, Properties props)
            throws FalconException {
        if (entity.getOutputs() == null) {
            props.put(WorkflowExecutionArgs.OUTPUT_FEED_NAMES.getName(), NONE);
            props.put(WorkflowExecutionArgs.OUTPUT_FEED_PATHS.getName(), NONE);
            return;
        }

        if (coord.getDatasets() == null) {
            coord.setDatasets(new DATASETS());
        }

        if (coord.getOutputEvents() == null) {
            coord.setOutputEvents(new OUTPUTEVENTS());
        }

        List<String> outputFeeds = new ArrayList<String>();
        List<String> outputPaths = new ArrayList<String>();
        for (Output output : entity.getOutputs().getOutputs()) {
            Feed feed = EntityUtil.getEntity(EntityType.FEED, output.getFeed());
            Storage storage = FeedHelper.createStorage(cluster, feed);

            SYNCDATASET syncdataset = createDataSet(feed, cluster, storage, output.getName(), LocationType.DATA);
            if (syncdataset == null) {
                return;
            }
            coord.getDatasets().getDatasetOrAsyncDataset().add(syncdataset);

            DATAOUT dataout = createDataOut(output);
            coord.getOutputEvents().getDataOut().add(dataout);

            String outputExpr = "${coord:dataOut('" + output.getName() + "')}";
            outputFeeds.add(feed.getName());
            outputPaths.add(outputExpr);

            if (storage.getType() == Storage.TYPE.FILESYSTEM) {
                props.put(output.getName(), outputExpr);

                propagateFileSystemProperties(output, feed, cluster, coord, storage, props);
            } else if (storage.getType() == Storage.TYPE.TABLE) {
                propagateCatalogTableProperties(output, (CatalogStorage) storage, props);
            }
        }

        // Output feed name and path for parent workflow
        props.put(WorkflowExecutionArgs.OUTPUT_FEED_NAMES.getName(), StringUtils.join(outputFeeds, ','));
        props.put(WorkflowExecutionArgs.OUTPUT_FEED_PATHS.getName(), StringUtils.join(outputPaths, ','));
    }

    private DATAOUT createDataOut(Output output) {
        DATAOUT dataout = new DATAOUT();
        dataout.setName(output.getName());
        dataout.setDataset(output.getName());
        dataout.setInstance(getELExpression(output.getInstance()));
        return dataout;
    }

    private void propagateFileSystemProperties(Output output, Feed feed, Cluster cluster, COORDINATORAPP coord,
            Storage storage, Properties props) throws FalconException {
        // stats and meta paths
        createOutputEvent(output, feed, cluster, LocationType.STATS, coord, props, storage);
        createOutputEvent(output, feed, cluster, LocationType.META, coord, props, storage);
        createOutputEvent(output, feed, cluster, LocationType.TMP, coord, props, storage);
    }

    //SUSPEND CHECKSTYLE CHECK ParameterNumberCheck
    private void createOutputEvent(Output output, Feed feed, Cluster cluster, LocationType locType,
            COORDINATORAPP coord, Properties props, Storage storage) throws FalconException {
        String name = output.getName();
        String type = locType.name().toLowerCase();

        SYNCDATASET dataset = createDataSet(feed, cluster, storage, name + type, locType);
        if (dataset == null) {
            return;
        }
        coord.getDatasets().getDatasetOrAsyncDataset().add(dataset);

        DATAOUT dataout = new DATAOUT();
        dataout.setName(name + type);
        dataout.setDataset(name + type);
        dataout.setInstance(getELExpression(output.getInstance()));

        OUTPUTEVENTS outputEvents = coord.getOutputEvents();
        if (outputEvents == null) {
            outputEvents = new OUTPUTEVENTS();
            coord.setOutputEvents(outputEvents);
        }
        outputEvents.getDataOut().add(dataout);

        String outputExpr = "${coord:dataOut('" + name + type + "')}";
        props.put(name + "." + type, outputExpr);
    }
    //RESUME CHECKSTYLE CHECK ParameterNumberCheck

    protected void propagateCatalogTableProperties(Input input, CatalogStorage tableStorage, Properties props) {
        String prefix = "falcon_" + input.getName();

        propagateCommonCatalogTableProperties(tableStorage, props, prefix);

        props.put(prefix + "_partition_filter_pig",
                "${coord:dataInPartitionFilter('" + input.getName() + "', 'pig')}");
        props.put(prefix + "_partition_filter_hive",
                "${coord:dataInPartitionFilter('" + input.getName() + "', 'hive')}");
        props.put(prefix + "_partition_filter_java",
                "${coord:dataInPartitionFilter('" + input.getName() + "', 'java')}");
    }
}