org.apache.falcon.oozie.OozieEntityBuilder.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.falcon.oozie.OozieEntityBuilder.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.falcon.oozie;

import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.falcon.FalconException;
import org.apache.falcon.entity.CatalogStorage;
import org.apache.falcon.entity.ClusterHelper;
import org.apache.falcon.entity.v0.Entity;
import org.apache.falcon.entity.v0.cluster.Cluster;
import org.apache.falcon.entity.v0.cluster.ClusterLocationType;
import org.apache.falcon.entity.v0.cluster.Interfacetype;
import org.apache.falcon.entity.v0.cluster.Property;
import org.apache.falcon.entity.v0.feed.Feed;
import org.apache.falcon.entity.v0.process.Output;
import org.apache.falcon.entity.v0.process.Process;
import org.apache.falcon.hadoop.HadoopClientFactory;
import org.apache.falcon.oozie.feed.FeedBundleBuilder;
import org.apache.falcon.oozie.process.ProcessBundleBuilder;
import org.apache.falcon.security.SecurityUtil;
import org.apache.falcon.service.FalconPathFilter;
import org.apache.falcon.service.SharedLibraryHostingService;
import org.apache.falcon.util.RuntimeProperties;
import org.apache.falcon.util.StartupProperties;
import org.apache.falcon.workflow.engine.AbstractWorkflowEngine;
import org.apache.falcon.workflow.util.OozieConstants;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.oozie.client.OozieClient;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.xml.bind.JAXBContext;
import javax.xml.bind.JAXBElement;
import javax.xml.bind.JAXBException;
import javax.xml.bind.Marshaller;
import javax.xml.bind.Unmarshaller;
import javax.xml.transform.stream.StreamSource;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.List;
import java.util.Map.Entry;
import java.util.Properties;

/**
 * Base class for building oozie entities - workflow, coordinator and bundle.
 * @param <T>
 */
public abstract class OozieEntityBuilder<T extends Entity> {
    public static final Logger LOG = LoggerFactory.getLogger(OozieEntityBuilder.class);

    public static final String METASTOREURIS = "hive.metastore.uris";
    public static final String METASTORE_KERBEROS_PRINCIPAL = "hive.metastore.kerberos.principal";
    public static final String METASTORE_USE_THRIFT_SASL = "hive.metastore.sasl.enabled";

    public static final String ENTITY_PATH = "ENTITY_PATH";
    public static final String ENTITY_NAME = "ENTITY_NAME";
    protected static final String IGNORE = "IGNORE";
    protected static final String NONE = "NONE";

    private static final String WORKFLOW_PATH_SIZE_LIMIT = "workflow.path.size.limit";

    private static final FalconPathFilter FALCON_JAR_FILTER = new FalconPathFilter() {
        @Override
        public boolean accept(Path path) {
            String fileName = path.getName();
            return fileName.startsWith("falcon");
        }

        @Override
        public String getJarName(Path path) {
            String name = path.getName();
            if (name.endsWith(".jar")) {
                name = name.substring(0, name.indexOf(".jar"));
            }
            return name;
        }
    };

    protected T entity;
    protected final boolean isSecurityEnabled = SecurityUtil.isSecurityEnabled();

    public OozieEntityBuilder(T entity) {
        this.entity = entity;
    }

    public abstract Properties build(Cluster cluster, Path buildPath) throws FalconException;

    protected String getStoragePath(Path path) {
        if (path != null) {
            return getStoragePath(path.toString());
        }
        return null;
    }

    protected String getStoragePath(String path) {
        if (StringUtils.isNotEmpty(path)) {
            if (new Path(path).toUri().getScheme() == null && !path.startsWith("${nameNode}")) {
                path = "${nameNode}" + path;
            }
        }
        return path;
    }

    public static OozieEntityBuilder get(Entity entity) {
        switch (entity.getEntityType()) {
        case FEED:
            return new FeedBundleBuilder((Feed) entity);

        case PROCESS:
            return new ProcessBundleBuilder((Process) entity);

        default:
        }
        throw new IllegalArgumentException("Unhandled type: " + entity.getEntityType());
    }

    protected Path marshal(Cluster cluster, JAXBElement<?> jaxbElement, JAXBContext jaxbContext, Path outPath)
            throws FalconException {
        FileSystem fs = HadoopClientFactory.get().createProxiedFileSystem(outPath.toUri(),
                ClusterHelper.getConfiguration(cluster));
        verifyOozieEntityPath(fs, outPath, jaxbElement.getDeclaredType());
        try {
            Marshaller marshaller = jaxbContext.createMarshaller();
            marshaller.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, Boolean.TRUE);

            if (LOG.isDebugEnabled()) {
                StringWriter writer = new StringWriter();
                marshaller.marshal(jaxbElement, writer);
                LOG.debug("Writing definition to {} on cluster {}", outPath, cluster.getName());
                LOG.debug(writer.getBuffer().toString());
            }

            OutputStream out = fs.create(outPath);

            try {
                marshaller.marshal(jaxbElement, out);
            } finally {
                out.close();
            }

            LOG.info("Marshalled {} to {}", jaxbElement.getDeclaredType(), outPath);
            return outPath;
        } catch (Exception e) {
            throw new FalconException("Unable to marshall app object", e);
        }
    }

    protected Properties createAppProperties(Cluster cluster, String wfName) throws FalconException {
        Properties properties = getEntityProperties(cluster);
        properties.setProperty(AbstractWorkflowEngine.NAME_NODE, ClusterHelper.getStorageUrl(cluster));
        properties.setProperty(AbstractWorkflowEngine.JOB_TRACKER, ClusterHelper.getMREndPoint(cluster));
        properties.setProperty("colo.name", cluster.getColo());
        final String endpoint = ClusterHelper.getInterface(cluster, Interfacetype.WORKFLOW).getEndpoint();
        if (!OozieConstants.LOCAL_OOZIE.equals(endpoint)) {
            properties.setProperty(OozieClient.USE_SYSTEM_LIBPATH, "true");
        }
        properties.setProperty("falcon.libpath",
                ClusterHelper.getLocation(cluster, ClusterLocationType.WORKING).getPath() + "/lib");

        return properties;
    }

    protected Properties getHiveCredentials(Cluster cluster) {
        String metaStoreUrl = ClusterHelper.getRegistryEndPoint(cluster);
        if (metaStoreUrl == null) {
            throw new IllegalStateException("Registry interface is not defined in cluster: " + cluster.getName());
        }

        Properties hiveCredentials = new Properties();
        hiveCredentials.put(METASTOREURIS, metaStoreUrl);
        hiveCredentials.put("hive.metastore.execute.setugi", "true");
        hiveCredentials.put("hcatNode", metaStoreUrl.replace("thrift", "hcat"));
        hiveCredentials.put("hcat.metastore.uri", metaStoreUrl);

        if (isSecurityEnabled) {
            String principal = ClusterHelper.getPropertyValue(cluster, SecurityUtil.HIVE_METASTORE_PRINCIPAL);
            hiveCredentials.put(METASTORE_KERBEROS_PRINCIPAL, principal);
            hiveCredentials.put(METASTORE_USE_THRIFT_SASL, "true");
            hiveCredentials.put("hcat.metastore.principal", principal);
        }

        return hiveCredentials;
    }

    protected Configuration getHiveCredentialsAsConf(Cluster cluster) {
        Properties hiveCredentials = getHiveCredentials(cluster);

        Configuration hiveConf = new Configuration(false);
        for (Entry<Object, Object> entry : hiveCredentials.entrySet()) {
            hiveConf.set((String) entry.getKey(), (String) entry.getValue());
        }

        return hiveConf;
    }

    protected Properties getEntityProperties(Entity myEntity) {
        Properties properties = new Properties();
        switch (myEntity.getEntityType()) {
        case CLUSTER:
            org.apache.falcon.entity.v0.cluster.Properties clusterProps = ((Cluster) myEntity).getProperties();
            if (clusterProps != null) {
                for (Property prop : clusterProps.getProperties()) {
                    properties.put(prop.getName(), prop.getValue());
                }
            }
            break;

        case FEED:
            org.apache.falcon.entity.v0.feed.Properties feedProps = ((Feed) myEntity).getProperties();
            if (feedProps != null) {
                for (org.apache.falcon.entity.v0.feed.Property prop : feedProps.getProperties()) {
                    properties.put(prop.getName(), prop.getValue());
                }
            }
            break;

        case PROCESS:
            org.apache.falcon.entity.v0.process.Properties processProps = ((Process) myEntity).getProperties();
            if (processProps != null) {
                for (org.apache.falcon.entity.v0.process.Property prop : processProps.getProperties()) {
                    properties.put(prop.getName(), prop.getValue());
                }
            }
            break;

        default:
            throw new IllegalArgumentException("Unhandled entity type " + myEntity.getEntityType());
        }
        return properties;
    }

    protected void propagateCatalogTableProperties(Output output, CatalogStorage tableStorage, Properties props) {
        String prefix = "falcon_" + output.getName();

        propagateCommonCatalogTableProperties(tableStorage, props, prefix);

        //pig and java actions require partition expression as "key1=val1, key2=val2"
        props.put(prefix + "_partitions_pig", "${coord:dataOutPartitions('" + output.getName() + "')}");
        props.put(prefix + "_partitions_java", "${coord:dataOutPartitions('" + output.getName() + "')}");

        //hive requires partition expression as "key1='val1', key2='val2'" (with quotes around values)
        //there is no direct EL expression in oozie
        List<String> partitions = new ArrayList<String>();
        for (String key : tableStorage.getDatedPartitionKeys()) {
            StringBuilder expr = new StringBuilder();
            expr.append("${coord:dataOutPartitionValue('").append(output.getName()).append("', '").append(key)
                    .append("')}");
            props.put(prefix + "_dated_partition_value_" + key, expr.toString());
            partitions.add(key + "='" + expr + "'");

        }
        props.put(prefix + "_partitions_hive", StringUtils.join(partitions, ","));
    }

    protected void propagateCommonCatalogTableProperties(CatalogStorage tableStorage, Properties props,
            String prefix) {
        props.put(prefix + "_storage_type", tableStorage.getType().name());
        props.put(prefix + "_catalog_url", tableStorage.getCatalogUrl());
        props.put(prefix + "_database", tableStorage.getDatabase());
        props.put(prefix + "_table", tableStorage.getTable());
    }

    protected void copySharedLibs(Cluster cluster, Path libPath) throws FalconException {
        try {
            FileSystem fs = HadoopClientFactory.get().createProxiedFileSystem(libPath.toUri(),
                    ClusterHelper.getConfiguration(cluster));
            SharedLibraryHostingService.pushLibsToHDFS(fs,
                    StartupProperties.get().getProperty("system.lib.location"), libPath, FALCON_JAR_FILTER);
        } catch (IOException e) {
            throw new FalconException("Failed to copy shared libs on cluster " + cluster.getName(), e);
        }
    }

    protected Properties getProperties(Path path, String name) {
        if (path == null) {
            return null;
        }

        Properties prop = new Properties();
        prop.setProperty(OozieEntityBuilder.ENTITY_PATH, path.toString());
        prop.setProperty(OozieEntityBuilder.ENTITY_NAME, name);
        return prop;
    }

    protected <T> T unmarshal(String template, JAXBContext context, Class<T> cls) throws FalconException {
        InputStream resourceAsStream = null;
        try {
            resourceAsStream = OozieEntityBuilder.class.getResourceAsStream(template);
            Unmarshaller unmarshaller = context.createUnmarshaller();
            JAXBElement<T> jaxbElement = unmarshaller.unmarshal(new StreamSource(resourceAsStream), cls);
            return jaxbElement.getValue();
        } catch (JAXBException e) {
            throw new FalconException("Failed to unmarshal " + template, e);
        } finally {
            IOUtils.closeQuietly(resourceAsStream);
        }
    }

    private void verifyOozieEntityPath(FileSystem fs, Path path, Class type) throws FalconException {
        int oozieIdSizeLimit;
        String fullPath = fs.getUri().toString() + "/" + path.toUri().toString();
        try {
            String oozieIdSizeLimitString = RuntimeProperties.get().getProperty(WORKFLOW_PATH_SIZE_LIMIT, "255");
            oozieIdSizeLimit = Integer.parseInt(oozieIdSizeLimitString);
            int pathLength = fullPath.length();
            if (pathLength > oozieIdSizeLimit) {
                throw new FalconException("Length of app-path " + "\"" + fullPath + "\" of type " + type + " is "
                        + pathLength + ". This exceeds limit allowed by workflow engine " + oozieIdSizeLimit);
            }
        } catch (NumberFormatException ne) {
            throw new FalconException("Invalid value provided for runtime property \"" + WORKFLOW_PATH_SIZE_LIMIT
                    + "\". Please provide an integer value.");
        }
    }
}