org.apache.falcon.lifecycle.engine.oozie.utils.OozieBuilderUtils.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.falcon.lifecycle.engine.oozie.utils.OozieBuilderUtils.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.falcon.lifecycle.engine.oozie.utils;

import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.falcon.FalconException;
import org.apache.falcon.Tag;
import org.apache.falcon.entity.ClusterHelper;
import org.apache.falcon.entity.EntityUtil;
import org.apache.falcon.entity.ExternalId;
import org.apache.falcon.entity.HiveUtil;
import org.apache.falcon.entity.v0.Entity;
import org.apache.falcon.entity.v0.EntityType;
import org.apache.falcon.entity.v0.cluster.Cluster;
import org.apache.falcon.entity.v0.cluster.ClusterLocationType;
import org.apache.falcon.entity.v0.cluster.Interfacetype;
import org.apache.falcon.hadoop.HadoopClientFactory;
import org.apache.falcon.oozie.coordinator.CONFIGURATION;
import org.apache.falcon.oozie.coordinator.COORDINATORAPP;
import org.apache.falcon.oozie.workflow.ACTION;
import org.apache.falcon.oozie.workflow.CREDENTIAL;
import org.apache.falcon.oozie.workflow.CREDENTIALS;
import org.apache.falcon.oozie.workflow.END;
import org.apache.falcon.oozie.workflow.KILL;
import org.apache.falcon.oozie.workflow.START;
import org.apache.falcon.oozie.workflow.WORKFLOWAPP;
import org.apache.falcon.security.SecurityUtil;
import org.apache.falcon.util.RuntimeProperties;
import org.apache.falcon.util.StartupProperties;
import org.apache.falcon.workflow.WorkflowExecutionArgs;
import org.apache.falcon.workflow.WorkflowExecutionContext;
import org.apache.falcon.workflow.engine.AbstractWorkflowEngine;
import org.apache.falcon.workflow.util.OozieConstants;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.oozie.client.OozieClient;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.xml.bind.JAXBContext;
import javax.xml.bind.JAXBElement;
import javax.xml.bind.JAXBException;
import javax.xml.bind.Marshaller;
import javax.xml.bind.Unmarshaller;
import javax.xml.namespace.QName;
import javax.xml.transform.stream.StreamSource;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.StringWriter;
import java.util.List;
import java.util.Map;
import java.util.Properties;

/**
 * Utility class to build oozie artificats.
 */
public final class OozieBuilderUtils {
    private static final Logger LOG = LoggerFactory.getLogger(OozieBuilderUtils.class);

    private static final String POSTPROCESS_TEMPLATE = "/action/post-process.xml";

    public static final String HIVE_CREDENTIAL_NAME = "falconHiveAuth";
    public static final String MR_QUEUE_NAME = "queueName";
    public static final String MR_JOB_PRIORITY = "jobPriority";
    private static final String NOMINAL_TIME_EL = "${coord:formatTime(coord:nominalTime(), 'yyyy-MM-dd-HH-mm')}";
    private static final String ACTUAL_TIME_EL = "${coord:formatTime(coord:actualTime(), 'yyyy-MM-dd-HH-mm')}";
    private static final Long DEFAULT_BROKER_MSG_TTL = 3 * 24 * 60L;

    private static final JAXBContext WORKFLOW_JAXB_CONTEXT;
    private static final JAXBContext ACTION_JAXB_CONTEXT;
    private static final JAXBContext COORD_JAXB_CONTEXT;
    private static final JAXBContext CONFIG_JAXB_CONTEXT;

    public static final String SUCCESS_POSTPROCESS_ACTION_NAME = "succeeded-post-processing";
    public static final String FAIL_POSTPROCESS_ACTION_NAME = "failed-post-processing";
    public static final String OK_ACTION_NAME = "end";
    public static final String FAIL_ACTION_NAME = "fail";

    public static final String ENTITY_PATH = "ENTITY_PATH";
    public static final String ENTITY_NAME = "ENTITY_NAME";
    public static final String IGNORE = "IGNORE";

    static {
        try {
            WORKFLOW_JAXB_CONTEXT = JAXBContext.newInstance(WORKFLOWAPP.class);
            ACTION_JAXB_CONTEXT = JAXBContext.newInstance(org.apache.falcon.oozie.workflow.ACTION.class);
            COORD_JAXB_CONTEXT = JAXBContext.newInstance(COORDINATORAPP.class);
            CONFIG_JAXB_CONTEXT = JAXBContext.newInstance(org.apache.falcon.oozie.workflow.CONFIGURATION.class);
        } catch (JAXBException e) {
            throw new RuntimeException("Unable to create JAXB context", e);
        }
    }

    private OozieBuilderUtils() {

    }

    public static ACTION addTransition(ACTION action, String ok, String fail) {
        // XTODOS : why return when it is changing the same object?
        action.getOk().setTo(ok);
        action.getError().setTo(fail);
        return action;
    }

    public static void decorateWorkflow(WORKFLOWAPP wf, String name, String startAction) {
        wf.setName(name);
        wf.setStart(new START());
        wf.getStart().setTo(startAction);

        wf.setEnd(new END());
        wf.getEnd().setName(OK_ACTION_NAME);

        KILL kill = new KILL();
        kill.setName(FAIL_ACTION_NAME);
        kill.setMessage("Workflow failed, error message[${wf:errorMessage(wf:lastErrorNode())}]");
        wf.getDecisionOrForkOrJoin().add(kill);
    }

    public static ACTION getSuccessPostProcessAction() throws FalconException {
        ACTION action = unmarshalAction(POSTPROCESS_TEMPLATE);
        decorateWithOozieRetries(action);
        return action;
    }

    public static ACTION getFailPostProcessAction() throws FalconException {
        ACTION action = unmarshalAction(POSTPROCESS_TEMPLATE);
        decorateWithOozieRetries(action);
        action.setName(FAIL_POSTPROCESS_ACTION_NAME);
        return action;
    }

    private static Path marshal(Cluster cluster, JAXBElement<?> jaxbElement, JAXBContext jaxbContext, Path outPath)
            throws FalconException {
        try {
            Marshaller marshaller = jaxbContext.createMarshaller();
            marshaller.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, Boolean.TRUE);

            if (LOG.isDebugEnabled()) {
                StringWriter writer = new StringWriter();
                marshaller.marshal(jaxbElement, writer);
                LOG.debug("Writing definition to {} on cluster {}", outPath, cluster.getName());
                LOG.debug(writer.getBuffer().toString());
            }

            FileSystem fs = HadoopClientFactory.get().createProxiedFileSystem(outPath.toUri(),
                    ClusterHelper.getConfiguration(cluster));
            OutputStream out = fs.create(outPath);
            try {
                marshaller.marshal(jaxbElement, out);
            } finally {
                out.close();
            }

            LOG.info("Marshalled {} to {}", jaxbElement.getDeclaredType(), outPath);
            return outPath;
        } catch (Exception e) {
            throw new FalconException("Unable to marshall app object", e);
        }
    }

    public static Path marshalCoordinator(Cluster cluster, COORDINATORAPP coord, Path outPath)
            throws FalconException {
        return marshal(cluster, new org.apache.falcon.oozie.coordinator.ObjectFactory().createCoordinatorApp(coord),
                COORD_JAXB_CONTEXT, new Path(outPath, "coordinator.xml"));
    }

    public static Path marshalDefaultConfig(Cluster cluster, WORKFLOWAPP workflowapp, Properties properties,
            Path outPath) throws FalconException {
        QName workflowQName = new org.apache.falcon.oozie.workflow.ObjectFactory().createWorkflowApp(workflowapp)
                .getName();
        org.apache.falcon.oozie.workflow.CONFIGURATION config = getWorkflowConfig(properties);
        JAXBElement<org.apache.falcon.oozie.workflow.CONFIGURATION> configJaxbElement = new JAXBElement(
                new QName(workflowQName.getNamespaceURI(), "configuration", workflowQName.getPrefix()),
                org.apache.falcon.oozie.workflow.CONFIGURATION.class, config);

        Path defaultConfigPath = new Path(outPath, "config-default.xml");
        return marshal(cluster, configJaxbElement, CONFIG_JAXB_CONTEXT, defaultConfigPath);
    }

    public static Path marshalWokflow(Cluster cluster, WORKFLOWAPP workflow, Path outPath) throws FalconException {
        return marshal(cluster, new org.apache.falcon.oozie.workflow.ObjectFactory().createWorkflowApp(workflow),
                WORKFLOW_JAXB_CONTEXT, new Path(outPath, "workflow.xml"));
    }

    public static <T> T unmarshal(String template, JAXBContext context, Class<T> cls) throws FalconException {
        InputStream resourceAsStream = null;
        try {
            resourceAsStream = OozieBuilderUtils.class.getResourceAsStream(template);
            Unmarshaller unmarshaller = context.createUnmarshaller();
            JAXBElement<T> jaxbElement = unmarshaller.unmarshal(new StreamSource(resourceAsStream), cls);
            return jaxbElement.getValue();
        } catch (JAXBException e) {
            throw new FalconException("Failed to unmarshal " + template, e);
        } finally {
            IOUtils.closeQuietly(resourceAsStream);
        }
    }

    public static ACTION unmarshalAction(String template) throws FalconException {
        return unmarshal(template, ACTION_JAXB_CONTEXT, ACTION.class);
    }

    // XTODOS Should we make them more specific to feeds??
    public static void addLibExtensionsToWorkflow(Cluster cluster, WORKFLOWAPP wf, Tag tag, EntityType type)
            throws FalconException {
        String libext = ClusterHelper.getLocation(cluster, ClusterLocationType.WORKING).getPath() + "/libext";
        FileSystem fs = HadoopClientFactory.get().createProxiedFileSystem(ClusterHelper.getConfiguration(cluster));
        try {
            addExtensionJars(fs, new Path(libext), wf);
            addExtensionJars(fs, new Path(libext, type.name()), wf);
            if (tag != null) {
                addExtensionJars(fs, new Path(libext, type.name() + "/" + tag.name().toLowerCase()), wf);
            }
        } catch (IOException e) {
            throw new FalconException(e);
        }
    }

    /**
     *
     * @param path
     * @param name
     * @return
     */
    public static Properties getProperties(Path path, String name) {
        if (path == null) {
            return null;
        }
        Properties prop = new Properties();
        prop.setProperty(ENTITY_PATH, path.toString());
        prop.setProperty(ENTITY_NAME, name);
        return prop;
    }

    /**
     * Adds path(will be the list of directories containing jars to be added as external jars to workflow e.g.
     * for feeds libext, libext/FEED/, libext/FEED/RETENTION, libext/FEED/REPLICATION as an extension jar to the
     * workflow. e.g.
     *
     * @param fs
     * @param path
     * @param wf
     * @throws IOException
     */
    public static void addExtensionJars(FileSystem fs, Path path, WORKFLOWAPP wf) throws IOException {
        FileStatus[] libs;
        try {
            libs = fs.listStatus(path);
        } catch (FileNotFoundException ignore) {
            //Ok if the libext is not configured
            return;
        }

        for (FileStatus lib : libs) {
            if (lib.isDirectory()) {
                continue;
            }

            for (Object obj : wf.getDecisionOrForkOrJoin()) {
                if (!(obj instanceof ACTION)) {
                    continue;
                }
                ACTION action = (ACTION) obj;
                List<String> files = null;
                if (action.getJava() != null) {
                    files = action.getJava().getFile();
                } else if (action.getPig() != null) {
                    files = action.getPig().getFile();
                } else if (action.getMapReduce() != null) {
                    files = action.getMapReduce().getFile();
                }
                if (files != null) {
                    files.add(lib.getPath().toString());
                }
            }
        }
    }

    public static void decorateWithOozieRetries(ACTION action) {
        Properties props = RuntimeProperties.get();
        action.setRetryMax(props.getProperty("falcon.parentworkflow.retry.max", "3"));
        action.setRetryInterval(props.getProperty("falcon.parentworkflow.retry.interval.secs", "1"));
    }

    // creates the default configuration which is written in config-default.xml
    public static Properties createDefaultConfiguration(Cluster cluster, Entity entity,
            WorkflowExecutionContext.EntityOperations operation) throws FalconException {
        Properties props = new Properties();
        props.put(WorkflowExecutionArgs.ENTITY_NAME.getName(), entity.getName());
        props.put(WorkflowExecutionArgs.ENTITY_TYPE.getName(), entity.getEntityType().name());
        props.put(WorkflowExecutionArgs.CLUSTER_NAME.getName(), cluster.getName());
        props.put(WorkflowExecutionArgs.DATASOURCE_NAME.getName(), "NA");
        props.put("falconDataOperation", operation.name());

        props.put(WorkflowExecutionArgs.LOG_DIR.getName(), getStoragePath(EntityUtil.getLogPath(cluster, entity)));
        props.put(WorkflowExecutionArgs.WF_ENGINE_URL.getName(), ClusterHelper.getOozieUrl(cluster));

        addLateDataProperties(props, entity);
        addBrokerProperties(cluster, props);

        props.put(MR_QUEUE_NAME, "default");
        props.put(MR_JOB_PRIORITY, "NORMAL");

        //properties provided in entity override the default generated properties
        props.putAll(EntityUtil.getEntityProperties(entity));
        props.putAll(createAppProperties(cluster));
        return props;
    }

    // gets the cluster specific properties to be populated in config-default.xml
    private static Properties createAppProperties(Cluster cluster) throws FalconException {
        Properties properties = EntityUtil.getEntityProperties(cluster);
        properties.setProperty(AbstractWorkflowEngine.NAME_NODE, ClusterHelper.getStorageUrl(cluster));
        properties.setProperty(AbstractWorkflowEngine.JOB_TRACKER, ClusterHelper.getMREndPoint(cluster));
        properties.setProperty("colo.name", cluster.getColo());
        final String endpoint = ClusterHelper.getInterface(cluster, Interfacetype.WORKFLOW).getEndpoint();
        if (!OozieConstants.LOCAL_OOZIE.equals(endpoint)) {
            properties.setProperty(OozieClient.USE_SYSTEM_LIBPATH, "true");
        }
        properties.setProperty("falcon.libpath",
                ClusterHelper.getLocation(cluster, ClusterLocationType.WORKING).getPath() + "/lib");

        return properties;
    }

    // creates hive-site.xml configuration in conf dir for the given cluster on the same cluster.
    public static void createHiveConfiguration(Cluster cluster, Path workflowPath, String prefix)
            throws FalconException {
        Configuration hiveConf = getHiveCredentialsAsConf(cluster);

        try {
            Configuration conf = ClusterHelper.getConfiguration(cluster);
            FileSystem fs = HadoopClientFactory.get().createProxiedFileSystem(conf);

            // create hive conf to stagingDir
            Path confPath = new Path(workflowPath + "/conf");

            persistHiveConfiguration(fs, confPath, hiveConf, prefix);
        } catch (IOException e) {
            throw new FalconException("Unable to create create hive site", e);
        }
    }

    private static void persistHiveConfiguration(FileSystem fs, Path confPath, Configuration hiveConf,
            String prefix) throws IOException {
        OutputStream out = null;
        try {
            out = fs.create(new Path(confPath, prefix + "hive-site.xml"));
            hiveConf.writeXml(out);
        } finally {
            IOUtils.closeQuietly(out);
        }
    }

    /**
     * This is only necessary if table is involved and is secure mode.
     *
     * @param workflowApp workflow xml
     * @param cluster     cluster entity
     */
    public static void addHCatalogCredentials(WORKFLOWAPP workflowApp, Cluster cluster, String credentialName) {
        CREDENTIALS credentials = workflowApp.getCredentials();
        if (credentials == null) {
            credentials = new CREDENTIALS();
        }

        credentials.getCredential().add(createHCatalogCredential(cluster, credentialName));

        // add credential for workflow
        workflowApp.setCredentials(credentials);
    }

    /**
     * This is only necessary if table is involved and is secure mode.
     *
     * @param cluster        cluster entity
     * @param credentialName credential name
     * @return CREDENTIALS object
     */
    public static CREDENTIAL createHCatalogCredential(Cluster cluster, String credentialName) {
        final String metaStoreUrl = ClusterHelper.getRegistryEndPoint(cluster);

        CREDENTIAL credential = new CREDENTIAL();
        credential.setName(credentialName);
        credential.setType("hcat");

        credential.getProperty().add(createProperty(HiveUtil.METASTROE_URI, metaStoreUrl));
        credential.getProperty().add(createProperty(SecurityUtil.METASTORE_PRINCIPAL,
                ClusterHelper.getPropertyValue(cluster, SecurityUtil.HIVE_METASTORE_KERBEROS_PRINCIPAL)));

        return credential;
    }

    public static CREDENTIAL.Property createProperty(String name, String value) {
        CREDENTIAL.Property property = new CREDENTIAL.Property();
        property.setName(name);
        property.setValue(value);
        return property;
    }

    private static Properties getHiveCredentials(Cluster cluster) {
        String metaStoreUrl = ClusterHelper.getRegistryEndPoint(cluster);
        if (metaStoreUrl == null) {
            throw new IllegalStateException("Registry interface is not defined in cluster: " + cluster.getName());
        }

        Properties hiveCredentials = new Properties();
        hiveCredentials.put(HiveUtil.METASTOREURIS, metaStoreUrl);
        hiveCredentials.put(HiveUtil.METASTORE_UGI, "true");
        hiveCredentials.put(HiveUtil.NODE, metaStoreUrl.replace("thrift", "hcat"));
        hiveCredentials.put(HiveUtil.METASTROE_URI, metaStoreUrl);

        if (SecurityUtil.isSecurityEnabled()) {
            String principal = ClusterHelper.getPropertyValue(cluster,
                    SecurityUtil.HIVE_METASTORE_KERBEROS_PRINCIPAL);
            hiveCredentials.put(SecurityUtil.METASTORE_PRINCIPAL, principal);
            hiveCredentials.put(SecurityUtil.HIVE_METASTORE_KERBEROS_PRINCIPAL, principal);
            hiveCredentials.put(SecurityUtil.METASTORE_USE_THRIFT_SASL, "true");
        }

        return hiveCredentials;
    }

    private static Configuration getHiveCredentialsAsConf(Cluster cluster) {
        Properties hiveCredentials = getHiveCredentials(cluster);

        Configuration hiveConf = new Configuration(false);
        for (Map.Entry<Object, Object> entry : hiveCredentials.entrySet()) {
            hiveConf.set((String) entry.getKey(), (String) entry.getValue());
        }

        return hiveConf;
    }

    public static Path getBuildPath(Path buildPath, Tag tag) {
        return new Path(buildPath, tag.name());
    }

    protected static String getStoragePath(Path path) {
        if (path != null) {
            return getStoragePath(path.toString());
        }
        return null;
    }

    public static String getStoragePath(String path) {
        if (StringUtils.isNotEmpty(path)) {
            if (new Path(path).toUri().getScheme() == null && !path.startsWith("${nameNode}")) {
                path = "${nameNode}" + path;
            }
        }
        return path;
    }

    // default configuration for coordinator
    public static Properties createCoordDefaultConfiguration(String coordName, Entity entity)
            throws FalconException {

        Properties props = new Properties();
        props.put(WorkflowExecutionArgs.NOMINAL_TIME.getName(), NOMINAL_TIME_EL);
        props.put(WorkflowExecutionArgs.TIMESTAMP.getName(), ACTUAL_TIME_EL);
        props.put(OozieClient.EXTERNAL_ID, new ExternalId(entity.getName(),
                EntityUtil.getWorkflowNameTag(coordName, entity), "${coord:nominalTime()}").getId());
        props.put(WorkflowExecutionArgs.USER_JMS_NOTIFICATION_ENABLED.getName(), "true");
        props.put(WorkflowExecutionArgs.SYSTEM_JMS_NOTIFICATION_ENABLED.getName(),
                RuntimeProperties.get().getProperty("falcon.jms.notification.enabled", "true"));
        //props in entity override the set props.
        props.putAll(EntityUtil.getEntityProperties(entity));
        return props;
    }

    private static void addLateDataProperties(Properties props, Entity entity) throws FalconException {
        if (EntityUtil.getLateProcess(entity) == null || EntityUtil.getLateProcess(entity).getLateInputs() == null
                || EntityUtil.getLateProcess(entity).getLateInputs().size() == 0) {
            props.put("shouldRecord", "false");
        } else {
            props.put("shouldRecord", "true");
        }
    }

    private static void addBrokerProperties(Cluster cluster, Properties props) {
        props.put(WorkflowExecutionArgs.USER_BRKR_URL.getName(), ClusterHelper.getMessageBrokerUrl(cluster));
        props.put(WorkflowExecutionArgs.USER_BRKR_IMPL_CLASS.getName(),
                ClusterHelper.getMessageBrokerImplClass(cluster));

        String falconBrokerUrl = StartupProperties.get().getProperty("broker.url",
                "tcp://localhost:61616?daemon=true");
        props.put(WorkflowExecutionArgs.BRKR_URL.getName(), falconBrokerUrl);

        String falconBrokerImplClass = StartupProperties.get().getProperty("broker.impl.class",
                ClusterHelper.DEFAULT_BROKER_IMPL_CLASS);
        props.put(WorkflowExecutionArgs.BRKR_IMPL_CLASS.getName(), falconBrokerImplClass);

        String jmsMessageTTL = StartupProperties.get().getProperty("broker.ttlInMins",
                DEFAULT_BROKER_MSG_TTL.toString());
        props.put(WorkflowExecutionArgs.BRKR_TTL.getName(), jmsMessageTTL);
    }

    private static org.apache.falcon.oozie.workflow.CONFIGURATION getWorkflowConfig(Properties props) {
        org.apache.falcon.oozie.workflow.CONFIGURATION conf = new org.apache.falcon.oozie.workflow.CONFIGURATION();
        for (Map.Entry<Object, Object> prop : props.entrySet()) {
            org.apache.falcon.oozie.workflow.CONFIGURATION.Property confProp = new org.apache.falcon.oozie.workflow.CONFIGURATION.Property();
            confProp.setName((String) prop.getKey());
            confProp.setValue((String) prop.getValue());
            conf.getProperty().add(confProp);
        }
        return conf;
    }

    public static CONFIGURATION getCoordinatorConfig(Properties props) {
        CONFIGURATION conf = new CONFIGURATION();
        for (Map.Entry<Object, Object> prop : props.entrySet()) {
            CONFIGURATION.Property confProp = new CONFIGURATION.Property();
            confProp.setName((String) prop.getKey());
            confProp.setValue((String) prop.getValue());
            conf.getProperty().add(confProp);
        }
        return conf;
    }
}