Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.falcon.lifecycle.engine.oozie.utils; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.falcon.FalconException; import org.apache.falcon.Tag; import org.apache.falcon.entity.ClusterHelper; import org.apache.falcon.entity.EntityUtil; import org.apache.falcon.entity.ExternalId; import org.apache.falcon.entity.HiveUtil; import org.apache.falcon.entity.v0.Entity; import org.apache.falcon.entity.v0.EntityType; import org.apache.falcon.entity.v0.cluster.Cluster; import org.apache.falcon.entity.v0.cluster.ClusterLocationType; import org.apache.falcon.entity.v0.cluster.Interfacetype; import org.apache.falcon.hadoop.HadoopClientFactory; import org.apache.falcon.oozie.coordinator.CONFIGURATION; import org.apache.falcon.oozie.coordinator.COORDINATORAPP; import org.apache.falcon.oozie.workflow.ACTION; import org.apache.falcon.oozie.workflow.CREDENTIAL; import org.apache.falcon.oozie.workflow.CREDENTIALS; import org.apache.falcon.oozie.workflow.END; import org.apache.falcon.oozie.workflow.KILL; import org.apache.falcon.oozie.workflow.START; import org.apache.falcon.oozie.workflow.WORKFLOWAPP; import org.apache.falcon.security.SecurityUtil; import org.apache.falcon.util.RuntimeProperties; import org.apache.falcon.util.StartupProperties; import org.apache.falcon.workflow.WorkflowExecutionArgs; import org.apache.falcon.workflow.WorkflowExecutionContext; import org.apache.falcon.workflow.engine.AbstractWorkflowEngine; import org.apache.falcon.workflow.util.OozieConstants; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.oozie.client.OozieClient; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import javax.xml.bind.JAXBContext; import javax.xml.bind.JAXBElement; import javax.xml.bind.JAXBException; import javax.xml.bind.Marshaller; import javax.xml.bind.Unmarshaller; import javax.xml.namespace.QName; import javax.xml.transform.stream.StreamSource; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.io.StringWriter; import java.util.List; import java.util.Map; import java.util.Properties; /** * Utility class to build oozie artificats. */ public final class OozieBuilderUtils { private static final Logger LOG = LoggerFactory.getLogger(OozieBuilderUtils.class); private static final String POSTPROCESS_TEMPLATE = "/action/post-process.xml"; public static final String HIVE_CREDENTIAL_NAME = "falconHiveAuth"; public static final String MR_QUEUE_NAME = "queueName"; public static final String MR_JOB_PRIORITY = "jobPriority"; private static final String NOMINAL_TIME_EL = "${coord:formatTime(coord:nominalTime(), 'yyyy-MM-dd-HH-mm')}"; private static final String ACTUAL_TIME_EL = "${coord:formatTime(coord:actualTime(), 'yyyy-MM-dd-HH-mm')}"; private static final Long DEFAULT_BROKER_MSG_TTL = 3 * 24 * 60L; private static final JAXBContext WORKFLOW_JAXB_CONTEXT; private static final JAXBContext ACTION_JAXB_CONTEXT; private static final JAXBContext COORD_JAXB_CONTEXT; private static final JAXBContext CONFIG_JAXB_CONTEXT; public static final String SUCCESS_POSTPROCESS_ACTION_NAME = "succeeded-post-processing"; public static final String FAIL_POSTPROCESS_ACTION_NAME = "failed-post-processing"; public static final String OK_ACTION_NAME = "end"; public static final String FAIL_ACTION_NAME = "fail"; public static final String ENTITY_PATH = "ENTITY_PATH"; public static final String ENTITY_NAME = "ENTITY_NAME"; public static final String IGNORE = "IGNORE"; static { try { WORKFLOW_JAXB_CONTEXT = JAXBContext.newInstance(WORKFLOWAPP.class); ACTION_JAXB_CONTEXT = JAXBContext.newInstance(org.apache.falcon.oozie.workflow.ACTION.class); COORD_JAXB_CONTEXT = JAXBContext.newInstance(COORDINATORAPP.class); CONFIG_JAXB_CONTEXT = JAXBContext.newInstance(org.apache.falcon.oozie.workflow.CONFIGURATION.class); } catch (JAXBException e) { throw new RuntimeException("Unable to create JAXB context", e); } } private OozieBuilderUtils() { } public static ACTION addTransition(ACTION action, String ok, String fail) { // XTODOS : why return when it is changing the same object? action.getOk().setTo(ok); action.getError().setTo(fail); return action; } public static void decorateWorkflow(WORKFLOWAPP wf, String name, String startAction) { wf.setName(name); wf.setStart(new START()); wf.getStart().setTo(startAction); wf.setEnd(new END()); wf.getEnd().setName(OK_ACTION_NAME); KILL kill = new KILL(); kill.setName(FAIL_ACTION_NAME); kill.setMessage("Workflow failed, error message[${wf:errorMessage(wf:lastErrorNode())}]"); wf.getDecisionOrForkOrJoin().add(kill); } public static ACTION getSuccessPostProcessAction() throws FalconException { ACTION action = unmarshalAction(POSTPROCESS_TEMPLATE); decorateWithOozieRetries(action); return action; } public static ACTION getFailPostProcessAction() throws FalconException { ACTION action = unmarshalAction(POSTPROCESS_TEMPLATE); decorateWithOozieRetries(action); action.setName(FAIL_POSTPROCESS_ACTION_NAME); return action; } private static Path marshal(Cluster cluster, JAXBElement<?> jaxbElement, JAXBContext jaxbContext, Path outPath) throws FalconException { try { Marshaller marshaller = jaxbContext.createMarshaller(); marshaller.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, Boolean.TRUE); if (LOG.isDebugEnabled()) { StringWriter writer = new StringWriter(); marshaller.marshal(jaxbElement, writer); LOG.debug("Writing definition to {} on cluster {}", outPath, cluster.getName()); LOG.debug(writer.getBuffer().toString()); } FileSystem fs = HadoopClientFactory.get().createProxiedFileSystem(outPath.toUri(), ClusterHelper.getConfiguration(cluster)); OutputStream out = fs.create(outPath); try { marshaller.marshal(jaxbElement, out); } finally { out.close(); } LOG.info("Marshalled {} to {}", jaxbElement.getDeclaredType(), outPath); return outPath; } catch (Exception e) { throw new FalconException("Unable to marshall app object", e); } } public static Path marshalCoordinator(Cluster cluster, COORDINATORAPP coord, Path outPath) throws FalconException { return marshal(cluster, new org.apache.falcon.oozie.coordinator.ObjectFactory().createCoordinatorApp(coord), COORD_JAXB_CONTEXT, new Path(outPath, "coordinator.xml")); } public static Path marshalDefaultConfig(Cluster cluster, WORKFLOWAPP workflowapp, Properties properties, Path outPath) throws FalconException { QName workflowQName = new org.apache.falcon.oozie.workflow.ObjectFactory().createWorkflowApp(workflowapp) .getName(); org.apache.falcon.oozie.workflow.CONFIGURATION config = getWorkflowConfig(properties); JAXBElement<org.apache.falcon.oozie.workflow.CONFIGURATION> configJaxbElement = new JAXBElement( new QName(workflowQName.getNamespaceURI(), "configuration", workflowQName.getPrefix()), org.apache.falcon.oozie.workflow.CONFIGURATION.class, config); Path defaultConfigPath = new Path(outPath, "config-default.xml"); return marshal(cluster, configJaxbElement, CONFIG_JAXB_CONTEXT, defaultConfigPath); } public static Path marshalWokflow(Cluster cluster, WORKFLOWAPP workflow, Path outPath) throws FalconException { return marshal(cluster, new org.apache.falcon.oozie.workflow.ObjectFactory().createWorkflowApp(workflow), WORKFLOW_JAXB_CONTEXT, new Path(outPath, "workflow.xml")); } public static <T> T unmarshal(String template, JAXBContext context, Class<T> cls) throws FalconException { InputStream resourceAsStream = null; try { resourceAsStream = OozieBuilderUtils.class.getResourceAsStream(template); Unmarshaller unmarshaller = context.createUnmarshaller(); JAXBElement<T> jaxbElement = unmarshaller.unmarshal(new StreamSource(resourceAsStream), cls); return jaxbElement.getValue(); } catch (JAXBException e) { throw new FalconException("Failed to unmarshal " + template, e); } finally { IOUtils.closeQuietly(resourceAsStream); } } public static ACTION unmarshalAction(String template) throws FalconException { return unmarshal(template, ACTION_JAXB_CONTEXT, ACTION.class); } // XTODOS Should we make them more specific to feeds?? public static void addLibExtensionsToWorkflow(Cluster cluster, WORKFLOWAPP wf, Tag tag, EntityType type) throws FalconException { String libext = ClusterHelper.getLocation(cluster, ClusterLocationType.WORKING).getPath() + "/libext"; FileSystem fs = HadoopClientFactory.get().createProxiedFileSystem(ClusterHelper.getConfiguration(cluster)); try { addExtensionJars(fs, new Path(libext), wf); addExtensionJars(fs, new Path(libext, type.name()), wf); if (tag != null) { addExtensionJars(fs, new Path(libext, type.name() + "/" + tag.name().toLowerCase()), wf); } } catch (IOException e) { throw new FalconException(e); } } /** * * @param path * @param name * @return */ public static Properties getProperties(Path path, String name) { if (path == null) { return null; } Properties prop = new Properties(); prop.setProperty(ENTITY_PATH, path.toString()); prop.setProperty(ENTITY_NAME, name); return prop; } /** * Adds path(will be the list of directories containing jars to be added as external jars to workflow e.g. * for feeds libext, libext/FEED/, libext/FEED/RETENTION, libext/FEED/REPLICATION as an extension jar to the * workflow. e.g. * * @param fs * @param path * @param wf * @throws IOException */ public static void addExtensionJars(FileSystem fs, Path path, WORKFLOWAPP wf) throws IOException { FileStatus[] libs; try { libs = fs.listStatus(path); } catch (FileNotFoundException ignore) { //Ok if the libext is not configured return; } for (FileStatus lib : libs) { if (lib.isDirectory()) { continue; } for (Object obj : wf.getDecisionOrForkOrJoin()) { if (!(obj instanceof ACTION)) { continue; } ACTION action = (ACTION) obj; List<String> files = null; if (action.getJava() != null) { files = action.getJava().getFile(); } else if (action.getPig() != null) { files = action.getPig().getFile(); } else if (action.getMapReduce() != null) { files = action.getMapReduce().getFile(); } if (files != null) { files.add(lib.getPath().toString()); } } } } public static void decorateWithOozieRetries(ACTION action) { Properties props = RuntimeProperties.get(); action.setRetryMax(props.getProperty("falcon.parentworkflow.retry.max", "3")); action.setRetryInterval(props.getProperty("falcon.parentworkflow.retry.interval.secs", "1")); } // creates the default configuration which is written in config-default.xml public static Properties createDefaultConfiguration(Cluster cluster, Entity entity, WorkflowExecutionContext.EntityOperations operation) throws FalconException { Properties props = new Properties(); props.put(WorkflowExecutionArgs.ENTITY_NAME.getName(), entity.getName()); props.put(WorkflowExecutionArgs.ENTITY_TYPE.getName(), entity.getEntityType().name()); props.put(WorkflowExecutionArgs.CLUSTER_NAME.getName(), cluster.getName()); props.put(WorkflowExecutionArgs.DATASOURCE_NAME.getName(), "NA"); props.put("falconDataOperation", operation.name()); props.put(WorkflowExecutionArgs.LOG_DIR.getName(), getStoragePath(EntityUtil.getLogPath(cluster, entity))); props.put(WorkflowExecutionArgs.WF_ENGINE_URL.getName(), ClusterHelper.getOozieUrl(cluster)); addLateDataProperties(props, entity); addBrokerProperties(cluster, props); props.put(MR_QUEUE_NAME, "default"); props.put(MR_JOB_PRIORITY, "NORMAL"); //properties provided in entity override the default generated properties props.putAll(EntityUtil.getEntityProperties(entity)); props.putAll(createAppProperties(cluster)); return props; } // gets the cluster specific properties to be populated in config-default.xml private static Properties createAppProperties(Cluster cluster) throws FalconException { Properties properties = EntityUtil.getEntityProperties(cluster); properties.setProperty(AbstractWorkflowEngine.NAME_NODE, ClusterHelper.getStorageUrl(cluster)); properties.setProperty(AbstractWorkflowEngine.JOB_TRACKER, ClusterHelper.getMREndPoint(cluster)); properties.setProperty("colo.name", cluster.getColo()); final String endpoint = ClusterHelper.getInterface(cluster, Interfacetype.WORKFLOW).getEndpoint(); if (!OozieConstants.LOCAL_OOZIE.equals(endpoint)) { properties.setProperty(OozieClient.USE_SYSTEM_LIBPATH, "true"); } properties.setProperty("falcon.libpath", ClusterHelper.getLocation(cluster, ClusterLocationType.WORKING).getPath() + "/lib"); return properties; } // creates hive-site.xml configuration in conf dir for the given cluster on the same cluster. public static void createHiveConfiguration(Cluster cluster, Path workflowPath, String prefix) throws FalconException { Configuration hiveConf = getHiveCredentialsAsConf(cluster); try { Configuration conf = ClusterHelper.getConfiguration(cluster); FileSystem fs = HadoopClientFactory.get().createProxiedFileSystem(conf); // create hive conf to stagingDir Path confPath = new Path(workflowPath + "/conf"); persistHiveConfiguration(fs, confPath, hiveConf, prefix); } catch (IOException e) { throw new FalconException("Unable to create create hive site", e); } } private static void persistHiveConfiguration(FileSystem fs, Path confPath, Configuration hiveConf, String prefix) throws IOException { OutputStream out = null; try { out = fs.create(new Path(confPath, prefix + "hive-site.xml")); hiveConf.writeXml(out); } finally { IOUtils.closeQuietly(out); } } /** * This is only necessary if table is involved and is secure mode. * * @param workflowApp workflow xml * @param cluster cluster entity */ public static void addHCatalogCredentials(WORKFLOWAPP workflowApp, Cluster cluster, String credentialName) { CREDENTIALS credentials = workflowApp.getCredentials(); if (credentials == null) { credentials = new CREDENTIALS(); } credentials.getCredential().add(createHCatalogCredential(cluster, credentialName)); // add credential for workflow workflowApp.setCredentials(credentials); } /** * This is only necessary if table is involved and is secure mode. * * @param cluster cluster entity * @param credentialName credential name * @return CREDENTIALS object */ public static CREDENTIAL createHCatalogCredential(Cluster cluster, String credentialName) { final String metaStoreUrl = ClusterHelper.getRegistryEndPoint(cluster); CREDENTIAL credential = new CREDENTIAL(); credential.setName(credentialName); credential.setType("hcat"); credential.getProperty().add(createProperty(HiveUtil.METASTROE_URI, metaStoreUrl)); credential.getProperty().add(createProperty(SecurityUtil.METASTORE_PRINCIPAL, ClusterHelper.getPropertyValue(cluster, SecurityUtil.HIVE_METASTORE_KERBEROS_PRINCIPAL))); return credential; } public static CREDENTIAL.Property createProperty(String name, String value) { CREDENTIAL.Property property = new CREDENTIAL.Property(); property.setName(name); property.setValue(value); return property; } private static Properties getHiveCredentials(Cluster cluster) { String metaStoreUrl = ClusterHelper.getRegistryEndPoint(cluster); if (metaStoreUrl == null) { throw new IllegalStateException("Registry interface is not defined in cluster: " + cluster.getName()); } Properties hiveCredentials = new Properties(); hiveCredentials.put(HiveUtil.METASTOREURIS, metaStoreUrl); hiveCredentials.put(HiveUtil.METASTORE_UGI, "true"); hiveCredentials.put(HiveUtil.NODE, metaStoreUrl.replace("thrift", "hcat")); hiveCredentials.put(HiveUtil.METASTROE_URI, metaStoreUrl); if (SecurityUtil.isSecurityEnabled()) { String principal = ClusterHelper.getPropertyValue(cluster, SecurityUtil.HIVE_METASTORE_KERBEROS_PRINCIPAL); hiveCredentials.put(SecurityUtil.METASTORE_PRINCIPAL, principal); hiveCredentials.put(SecurityUtil.HIVE_METASTORE_KERBEROS_PRINCIPAL, principal); hiveCredentials.put(SecurityUtil.METASTORE_USE_THRIFT_SASL, "true"); } return hiveCredentials; } private static Configuration getHiveCredentialsAsConf(Cluster cluster) { Properties hiveCredentials = getHiveCredentials(cluster); Configuration hiveConf = new Configuration(false); for (Map.Entry<Object, Object> entry : hiveCredentials.entrySet()) { hiveConf.set((String) entry.getKey(), (String) entry.getValue()); } return hiveConf; } public static Path getBuildPath(Path buildPath, Tag tag) { return new Path(buildPath, tag.name()); } protected static String getStoragePath(Path path) { if (path != null) { return getStoragePath(path.toString()); } return null; } public static String getStoragePath(String path) { if (StringUtils.isNotEmpty(path)) { if (new Path(path).toUri().getScheme() == null && !path.startsWith("${nameNode}")) { path = "${nameNode}" + path; } } return path; } // default configuration for coordinator public static Properties createCoordDefaultConfiguration(String coordName, Entity entity) throws FalconException { Properties props = new Properties(); props.put(WorkflowExecutionArgs.NOMINAL_TIME.getName(), NOMINAL_TIME_EL); props.put(WorkflowExecutionArgs.TIMESTAMP.getName(), ACTUAL_TIME_EL); props.put(OozieClient.EXTERNAL_ID, new ExternalId(entity.getName(), EntityUtil.getWorkflowNameTag(coordName, entity), "${coord:nominalTime()}").getId()); props.put(WorkflowExecutionArgs.USER_JMS_NOTIFICATION_ENABLED.getName(), "true"); props.put(WorkflowExecutionArgs.SYSTEM_JMS_NOTIFICATION_ENABLED.getName(), RuntimeProperties.get().getProperty("falcon.jms.notification.enabled", "true")); //props in entity override the set props. props.putAll(EntityUtil.getEntityProperties(entity)); return props; } private static void addLateDataProperties(Properties props, Entity entity) throws FalconException { if (EntityUtil.getLateProcess(entity) == null || EntityUtil.getLateProcess(entity).getLateInputs() == null || EntityUtil.getLateProcess(entity).getLateInputs().size() == 0) { props.put("shouldRecord", "false"); } else { props.put("shouldRecord", "true"); } } private static void addBrokerProperties(Cluster cluster, Properties props) { props.put(WorkflowExecutionArgs.USER_BRKR_URL.getName(), ClusterHelper.getMessageBrokerUrl(cluster)); props.put(WorkflowExecutionArgs.USER_BRKR_IMPL_CLASS.getName(), ClusterHelper.getMessageBrokerImplClass(cluster)); String falconBrokerUrl = StartupProperties.get().getProperty("broker.url", "tcp://localhost:61616?daemon=true"); props.put(WorkflowExecutionArgs.BRKR_URL.getName(), falconBrokerUrl); String falconBrokerImplClass = StartupProperties.get().getProperty("broker.impl.class", ClusterHelper.DEFAULT_BROKER_IMPL_CLASS); props.put(WorkflowExecutionArgs.BRKR_IMPL_CLASS.getName(), falconBrokerImplClass); String jmsMessageTTL = StartupProperties.get().getProperty("broker.ttlInMins", DEFAULT_BROKER_MSG_TTL.toString()); props.put(WorkflowExecutionArgs.BRKR_TTL.getName(), jmsMessageTTL); } private static org.apache.falcon.oozie.workflow.CONFIGURATION getWorkflowConfig(Properties props) { org.apache.falcon.oozie.workflow.CONFIGURATION conf = new org.apache.falcon.oozie.workflow.CONFIGURATION(); for (Map.Entry<Object, Object> prop : props.entrySet()) { org.apache.falcon.oozie.workflow.CONFIGURATION.Property confProp = new org.apache.falcon.oozie.workflow.CONFIGURATION.Property(); confProp.setName((String) prop.getKey()); confProp.setValue((String) prop.getValue()); conf.getProperty().add(confProp); } return conf; } public static CONFIGURATION getCoordinatorConfig(Properties props) { CONFIGURATION conf = new CONFIGURATION(); for (Map.Entry<Object, Object> prop : props.entrySet()) { CONFIGURATION.Property confProp = new CONFIGURATION.Property(); confProp.setName((String) prop.getKey()); confProp.setValue((String) prop.getValue()); conf.getProperty().add(confProp); } return conf; } }