org.apache.falcon.oozie.FeedImportCoordinatorBuilder.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.falcon.oozie.FeedImportCoordinatorBuilder.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.falcon.oozie;

import org.apache.commons.lang3.StringUtils;
import org.apache.falcon.FalconException;
import org.apache.falcon.LifeCycle;
import org.apache.falcon.Tag;
import org.apache.falcon.entity.FeedHelper;
import org.apache.falcon.entity.Storage;
import org.apache.falcon.entity.v0.SchemaHelper;
import org.apache.falcon.entity.v0.cluster.Cluster;
import org.apache.falcon.entity.v0.feed.Feed;
import org.apache.falcon.entity.v0.feed.LocationType;
import org.apache.falcon.oozie.coordinator.ACTION;
import org.apache.falcon.oozie.coordinator.COORDINATORAPP;
import org.apache.falcon.oozie.coordinator.DATAOUT;
import org.apache.falcon.oozie.coordinator.DATASETS;
import org.apache.falcon.oozie.coordinator.OUTPUTEVENTS;
import org.apache.falcon.oozie.coordinator.SYNCDATASET;
import org.apache.falcon.oozie.coordinator.WORKFLOW;
import org.apache.hadoop.fs.Path;
import org.slf4j.LoggerFactory;

import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.Properties;

/**
 * Builds Oozie coordinator for database import.
 */

public class FeedImportCoordinatorBuilder extends OozieCoordinatorBuilder<Feed> {
    public FeedImportCoordinatorBuilder(Feed entity) {
        super(entity, LifeCycle.IMPORT);
    }

    public static final String IMPORT_DATASET_NAME = "import-dataset";

    public static final String IMPORT_DATAOUT_NAME = "import-output";

    private static final org.slf4j.Logger LOG = LoggerFactory.getLogger(FeedImportCoordinatorBuilder.class);

    @Override
    public List<Properties> buildCoords(Cluster cluster, Path buildPath) throws FalconException {

        org.apache.falcon.entity.v0.feed.Cluster feedCluster = FeedHelper.getCluster((Feed) entity,
                cluster.getName());
        if (!FeedHelper.isImportEnabled(feedCluster)) {
            return null;
        }

        if (feedCluster.getValidity().getEnd().before(new Date())) {
            LOG.warn("Feed IMPORT is not applicable as Feed's end time for cluster {} is not in the future",
                    cluster.getName());
            return null;
        }

        COORDINATORAPP coord = new COORDINATORAPP();
        initializeCoordAttributes(coord, (Feed) entity, cluster);
        Properties props = createCoordDefaultConfiguration(getEntityName());
        initializeOutputPath(coord, cluster, props);

        props.putAll(FeedHelper.getUserWorkflowProperties(getLifecycle()));

        WORKFLOW workflow = new WORKFLOW();
        Path coordPath = getBuildPath(buildPath);
        Properties wfProp = OozieOrchestrationWorkflowBuilder.get(entity, cluster, Tag.IMPORT).build(cluster,
                coordPath);
        workflow.setAppPath(getStoragePath(wfProp.getProperty(OozieEntityBuilder.ENTITY_PATH)));
        props.putAll(wfProp);
        workflow.setConfiguration(getConfig(props));
        ACTION action = new ACTION();
        action.setWorkflow(workflow);

        coord.setAction(action);

        Path marshalPath = marshal(cluster, coord, coordPath);
        return Arrays.asList(getProperties(marshalPath, getEntityName()));
    }

    private void initializeOutputPath(COORDINATORAPP coord, Cluster cluster, Properties props)
            throws FalconException {

        if (coord.getDatasets() == null) {
            coord.setDatasets(new DATASETS());
        }

        if (coord.getOutputEvents() == null) {
            coord.setOutputEvents(new OUTPUTEVENTS());
        }

        Storage storage = FeedHelper.createStorage(cluster, (Feed) entity);
        SYNCDATASET syncdataset = createDataSet((Feed) entity, cluster, storage, IMPORT_DATASET_NAME,
                LocationType.DATA);

        if (syncdataset == null) {
            return;
        }
        coord.getDatasets().getDatasetOrAsyncDataset().add(syncdataset);

        DATAOUT dataout = createDataOut(entity);
        coord.getOutputEvents().getDataOut().add(dataout);
    }

    private DATAOUT createDataOut(Feed feed) {
        DATAOUT dataout = new DATAOUT();
        dataout.setName(IMPORT_DATAOUT_NAME);
        dataout.setDataset(IMPORT_DATASET_NAME);
        dataout.setInstance("${coord:current(0)}");
        return dataout;
    }

    /**
     * Create DataSet. The start instance is set to current date if the merge type is snapshot.
     * Otherwise, the Feed cluster start data will be used as start instance.
     *
     * @param feed
     * @param cluster
     * @param storage
     * @param datasetName
     * @param locationType
     * @return
     * @throws FalconException
     */
    private SYNCDATASET createDataSet(Feed feed, Cluster cluster, Storage storage, String datasetName,
            LocationType locationType) throws FalconException {
        SYNCDATASET syncdataset = new SYNCDATASET();
        syncdataset.setName(datasetName);
        syncdataset.setFrequency("${coord:" + feed.getFrequency().toString() + "}");

        String uriTemplate = storage.getUriTemplate(locationType);
        if (StringUtils.isBlank(uriTemplate)) {
            return null;
        }
        if (storage.getType() == Storage.TYPE.TABLE) {
            uriTemplate = uriTemplate.replace("thrift", "hcat"); // Oozie requires this!!!
        }
        syncdataset.setUriTemplate(uriTemplate);

        org.apache.falcon.entity.v0.feed.Cluster feedCluster = FeedHelper.getCluster(feed, cluster.getName());
        Date initialInstance = FeedHelper.getImportInitalInstance(feedCluster);
        syncdataset.setInitialInstance(SchemaHelper.formatDateUTC(initialInstance));
        syncdataset.setTimezone(feed.getTimezone().getID());

        if (StringUtils.isNotBlank(feed.getAvailabilityFlag())) {
            syncdataset.setDoneFlag(feed.getAvailabilityFlag());
        } else {
            syncdataset.setDoneFlag("");
        }

        return syncdataset;
    }

    /**
     * Initialize the coordinator with current data as start if the merge type is snapshot.
     * Otherwise, use the feed cluster validate as the coordinator start date.
     *
     * @param coord
     * @param feed
     * @param cluster
     */

    private void initializeCoordAttributes(COORDINATORAPP coord, Feed feed, Cluster cluster) {
        coord.setName(getEntityName());
        // for feeds with snapshot layout, the start date will be the time of scheduling since it dumps whole table
        org.apache.falcon.entity.v0.feed.Cluster feedCluster = FeedHelper.getCluster(feed, cluster.getName());
        Date initialInstance = FeedHelper.getImportInitalInstance(feedCluster);
        coord.setStart(SchemaHelper.formatDateUTC(initialInstance));
        coord.setEnd(SchemaHelper.formatDateUTC(feedCluster.getValidity().getEnd()));
        coord.setTimezone(entity.getTimezone().getID());
        coord.setFrequency("${coord:" + entity.getFrequency().toString() + "}");
    }
}