Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.falcon.workflow; import org.apache.commons.io.IOUtils; import org.apache.commons.lang.StringUtils; import org.apache.falcon.FalconException; import org.apache.falcon.FalconRuntimException; import org.apache.falcon.Tag; import org.apache.falcon.entity.ClusterHelper; import org.apache.falcon.entity.EntityUtil; import org.apache.falcon.entity.ExternalId; import org.apache.falcon.entity.store.ConfigurationStore; import org.apache.falcon.entity.v0.Entity; import org.apache.falcon.entity.v0.EntityType; import org.apache.falcon.entity.v0.cluster.Cluster; import org.apache.falcon.entity.v0.cluster.Property; import org.apache.falcon.hadoop.HadoopClientFactory; import org.apache.falcon.messaging.EntityInstanceMessage.ARG; import org.apache.falcon.oozie.bundle.BUNDLEAPP; import org.apache.falcon.oozie.bundle.COORDINATOR; import org.apache.falcon.oozie.coordinator.COORDINATORAPP; import org.apache.falcon.oozie.coordinator.ObjectFactory; import org.apache.falcon.oozie.workflow.ACTION; import org.apache.falcon.oozie.workflow.CREDENTIAL; import org.apache.falcon.oozie.workflow.CREDENTIALS; import org.apache.falcon.oozie.workflow.WORKFLOWAPP; import org.apache.falcon.security.SecurityUtil; import org.apache.falcon.service.FalconPathFilter; import org.apache.falcon.service.SharedLibraryHostingService; import org.apache.falcon.util.OozieUtils; import org.apache.falcon.util.RuntimeProperties; import org.apache.falcon.util.StartupProperties; import org.apache.falcon.workflow.engine.OozieWorkflowEngine; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.oozie.client.OozieClient; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import javax.xml.bind.JAXBContext; import javax.xml.bind.JAXBElement; import javax.xml.bind.JAXBException; import javax.xml.bind.Marshaller; import javax.xml.bind.Unmarshaller; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.io.StringWriter; import java.util.Arrays; import java.util.Date; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Properties; import java.util.Set; /** * Base workflow builder for falcon entities. * @param <T> */ public abstract class OozieWorkflowBuilder<T extends Entity> extends WorkflowBuilder<T> { private static final Logger LOG = LoggerFactory.getLogger(OozieWorkflowBuilder.class); protected static final ConfigurationStore CONFIG_STORE = ConfigurationStore.get(); protected static final String NOMINAL_TIME_EL = "${coord:formatTime(coord:nominalTime(), 'yyyy-MM-dd-HH-mm')}"; protected static final String ACTUAL_TIME_EL = "${coord:formatTime(coord:actualTime(), 'yyyy-MM-dd-HH-mm')}"; protected static final Long DEFAULT_BROKER_MSG_TTL = 3 * 24 * 60L; protected static final String MR_QUEUE_NAME = "queueName"; protected static final String MR_JOB_PRIORITY = "jobPriority"; protected static final String HIVE_CREDENTIAL_NAME = "falconHiveAuth"; public static final String METASTOREURIS = "hive.metastore.uris"; public static final String METASTORE_KERBEROS_PRINCIPAL = "hive.metastore.kerberos.principal"; public static final String METASTORE_USE_THRIFT_SASL = "hive.metastore.sasl.enabled"; protected static final String IGNORE = "IGNORE"; public static final Set<String> FALCON_ACTIONS = new HashSet<String>( Arrays.asList(new String[] { "recordsize", "succeeded-post-processing", "failed-post-processing", })); protected static final FalconPathFilter FALCON_JAR_FILTER = new FalconPathFilter() { @Override public boolean accept(Path path) { return path.getName().startsWith("falcon"); } @Override public String getJarName(Path path) { String name = path.getName(); if (name.endsWith(".jar")) { name = name.substring(0, name.indexOf(".jar")); } return name; } }; protected final boolean isSecurityEnabled; protected OozieWorkflowBuilder(T entity) { super(entity); isSecurityEnabled = SecurityUtil.isSecurityEnabled(); } protected Path getCoordPath(Path bundlePath, String coordName) { Tag tag = EntityUtil.getWorkflowNameTag(coordName, entity); return new Path(bundlePath, tag.name()); } protected abstract Map<String, String> getEntityProperties(); public boolean map(Cluster cluster, Path bundlePath) throws FalconException { BUNDLEAPP bundleApp = new BUNDLEAPP(); bundleApp.setName(EntityUtil.getWorkflowName(entity).toString()); // all the properties are set prior to bundle and coordinators creation List<COORDINATORAPP> coordinators = getCoordinators(cluster, bundlePath); if (coordinators.size() == 0) { return false; } for (COORDINATORAPP coordinatorapp : coordinators) { Path coordPath = getCoordPath(bundlePath, coordinatorapp.getName()); String coordXmlName = marshal(cluster, coordinatorapp, coordPath, EntityUtil.getWorkflowNameSuffix(coordinatorapp.getName(), entity)); createLogsDir(cluster, coordPath); //create logs dir // copy falcon libs to the workflow dir copySharedLibs(cluster, coordinatorapp); // add the coordinator to the bundle COORDINATOR bundleCoord = new COORDINATOR(); bundleCoord.setName(coordinatorapp.getName()); bundleCoord.setAppPath(getStoragePath(coordPath) + "/" + coordXmlName); bundleApp.getCoordinator().add(bundleCoord); } marshal(cluster, bundleApp, bundlePath); // write the bundle return true; } private void addExtensionJars(FileSystem fs, Path path, WORKFLOWAPP wf) throws IOException { FileStatus[] libs = null; try { libs = fs.listStatus(path); } catch (FileNotFoundException ignore) { //Ok if the libext is not configured } if (libs == null) { return; } for (FileStatus lib : libs) { if (lib.isDir()) { continue; } for (Object obj : wf.getDecisionOrForkOrJoin()) { if (!(obj instanceof ACTION)) { continue; } ACTION action = (ACTION) obj; List<String> files = null; if (action.getJava() != null) { files = action.getJava().getFile(); } else if (action.getPig() != null) { files = action.getPig().getFile(); } else if (action.getMapReduce() != null) { files = action.getMapReduce().getFile(); } if (files != null) { files.add(lib.getPath().toString()); } } } } protected void addLibExtensionsToWorkflow(Cluster cluster, WORKFLOWAPP wf, EntityType type, String lifecycle) throws IOException, FalconException { String libext = ClusterHelper.getLocation(cluster, "working") + "/libext"; FileSystem fs = HadoopClientFactory.get().createFileSystem(ClusterHelper.getConfiguration(cluster)); addExtensionJars(fs, new Path(libext), wf); addExtensionJars(fs, new Path(libext, type.name()), wf); if (StringUtils.isNotEmpty(lifecycle)) { addExtensionJars(fs, new Path(libext, type.name() + "/" + lifecycle), wf); } } private void copySharedLibs(Cluster cluster, COORDINATORAPP coordinatorapp) throws FalconException { try { String coordPath = coordinatorapp.getAction().getWorkflow().getAppPath().replace("${nameNode}", ""); Path libPath = new Path(coordPath, "lib"); SharedLibraryHostingService.pushLibsToHDFS(StartupProperties.get().getProperty("system.lib.location"), libPath, cluster, FALCON_JAR_FILTER); } catch (IOException e) { throw new FalconException("Failed to copy shared libs on cluster " + cluster.getName(), e); } } public abstract List<COORDINATORAPP> getCoordinators(Cluster cluster, Path bundlePath) throws FalconException; protected org.apache.falcon.oozie.coordinator.CONFIGURATION getCoordConfig(Map<String, String> propMap) { org.apache.falcon.oozie.coordinator.CONFIGURATION conf = new org.apache.falcon.oozie.coordinator.CONFIGURATION(); List<org.apache.falcon.oozie.coordinator.CONFIGURATION.Property> props = conf.getProperty(); for (Entry<String, String> prop : propMap.entrySet()) { props.add(createCoordProperty(prop.getKey(), prop.getValue())); } return conf; } protected Map<String, String> createCoordDefaultConfiguration(Cluster cluster, Path coordPath, String coordName) { Map<String, String> props = new HashMap<String, String>(); props.put(ARG.entityName.getPropName(), entity.getName()); props.put(ARG.nominalTime.getPropName(), NOMINAL_TIME_EL); props.put(ARG.timeStamp.getPropName(), ACTUAL_TIME_EL); props.put("userBrokerUrl", ClusterHelper.getMessageBrokerUrl(cluster)); props.put("userBrokerImplClass", ClusterHelper.getMessageBrokerImplClass(cluster)); String falconBrokerUrl = StartupProperties.get().getProperty(ARG.brokerUrl.getPropName(), "tcp://localhost:61616?daemon=true"); props.put(ARG.brokerUrl.getPropName(), falconBrokerUrl); String falconBrokerImplClass = StartupProperties.get().getProperty(ARG.brokerImplClass.getPropName(), ClusterHelper.DEFAULT_BROKER_IMPL_CLASS); props.put(ARG.brokerImplClass.getPropName(), falconBrokerImplClass); String jmsMessageTTL = StartupProperties.get().getProperty("broker.ttlInMins", DEFAULT_BROKER_MSG_TTL.toString()); props.put(ARG.brokerTTL.getPropName(), jmsMessageTTL); props.put(ARG.entityType.getPropName(), entity.getEntityType().name()); props.put("logDir", getStoragePath(new Path(coordPath, "../../logs"))); props.put(OozieClient.EXTERNAL_ID, new ExternalId(entity.getName(), EntityUtil.getWorkflowNameTag(coordName, entity), "${coord:nominalTime()}").getId()); props.put("workflowEngineUrl", ClusterHelper.getOozieUrl(cluster)); try { if (EntityUtil.getLateProcess(entity) == null || EntityUtil.getLateProcess(entity).getLateInputs() == null || EntityUtil.getLateProcess(entity).getLateInputs().size() == 0) { props.put("shouldRecord", "false"); } else { props.put("shouldRecord", "true"); } } catch (FalconException e) { LOG.error("Unable to get Late Process for entity: {}", entity, e); throw new FalconRuntimException(e); } props.put("entityName", entity.getName()); props.put("entityType", entity.getEntityType().name().toLowerCase()); props.put(ARG.cluster.getPropName(), cluster.getName()); if (cluster.getProperties() != null) { for (Property prop : cluster.getProperties().getProperties()) { props.put(prop.getName(), prop.getValue()); } } props.put(MR_QUEUE_NAME, "default"); props.put(MR_JOB_PRIORITY, "NORMAL"); //props in entity override the set props. props.putAll(getEntityProperties()); return props; } protected org.apache.falcon.oozie.coordinator.CONFIGURATION.Property createCoordProperty(String name, String value) { org.apache.falcon.oozie.coordinator.CONFIGURATION.Property prop = new org.apache.falcon.oozie.coordinator.CONFIGURATION.Property(); prop.setName(name); prop.setValue(value); return prop; } protected void marshal(Cluster cluster, JAXBElement<?> jaxbElement, JAXBContext jaxbContext, Path outPath) throws FalconException { try { Marshaller marshaller = jaxbContext.createMarshaller(); marshaller.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, Boolean.TRUE); FileSystem fs = HadoopClientFactory.get().createFileSystem(outPath.toUri(), ClusterHelper.getConfiguration(cluster)); OutputStream out = fs.create(outPath); try { marshaller.marshal(jaxbElement, out); } finally { out.close(); } if (LOG.isDebugEnabled()) { StringWriter writer = new StringWriter(); marshaller.marshal(jaxbElement, writer); LOG.debug("Writing definition to {} on cluster {}", outPath, cluster.getName()); LOG.debug(writer.getBuffer().toString()); } LOG.info("Marshalled {} to {}", jaxbElement.getDeclaredType(), outPath); } catch (Exception e) { throw new FalconException("Unable to marshall app object", e); } } private void createLogsDir(Cluster cluster, Path coordPath) throws FalconException { try { FileSystem fs = HadoopClientFactory.get().createFileSystem(coordPath.toUri(), ClusterHelper.getConfiguration(cluster)); Path logsDir = new Path(coordPath, "../../logs"); fs.mkdirs(logsDir); // logs are copied with in oozie as the user in Post Processing and hence 777 permissions FsPermission permission = new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL); fs.setPermission(logsDir, permission); } catch (Exception e) { throw new FalconException("Unable to create temp dir in " + coordPath, e); } } protected String marshal(Cluster cluster, COORDINATORAPP coord, Path outPath, String name) throws FalconException { name = (StringUtils.isEmpty(name) ? "coordinator" : name) + ".xml"; marshal(cluster, new ObjectFactory().createCoordinatorApp(coord), OozieUtils.COORD_JAXB_CONTEXT, new Path(outPath, name)); return name; } protected void marshal(Cluster cluster, BUNDLEAPP bundle, Path outPath) throws FalconException { marshal(cluster, new org.apache.falcon.oozie.bundle.ObjectFactory().createBundleApp(bundle), OozieUtils.BUNDLE_JAXB_CONTEXT, new Path(outPath, "bundle.xml")); } protected void marshal(Cluster cluster, WORKFLOWAPP workflow, Path outPath) throws FalconException { marshal(cluster, new org.apache.falcon.oozie.workflow.ObjectFactory().createWorkflowApp(workflow), OozieUtils.WORKFLOW_JAXB_CONTEXT, new Path(outPath, "workflow.xml")); } protected String getStoragePath(Path path) { if (path != null) { return getStoragePath(path.toString()); } return null; } protected String getStoragePath(String path) { if (StringUtils.isNotEmpty(path)) { if (new Path(path).toUri().getScheme() == null) { path = "${nameNode}" + path; } } return path; } protected WORKFLOWAPP getWorkflowTemplate(String template) throws FalconException { InputStream resourceAsStream = null; try { resourceAsStream = OozieWorkflowBuilder.class.getResourceAsStream(template); Unmarshaller unmarshaller = OozieUtils.WORKFLOW_JAXB_CONTEXT.createUnmarshaller(); @SuppressWarnings("unchecked") JAXBElement<WORKFLOWAPP> jaxbElement = (JAXBElement<WORKFLOWAPP>) unmarshaller .unmarshal(resourceAsStream); return jaxbElement.getValue(); } catch (JAXBException e) { throw new FalconException(e); } finally { IOUtils.closeQuietly(resourceAsStream); } } protected COORDINATORAPP getCoordinatorTemplate(String template) throws FalconException { InputStream resourceAsStream = null; try { resourceAsStream = OozieWorkflowBuilder.class.getResourceAsStream(template); Unmarshaller unmarshaller = OozieUtils.COORD_JAXB_CONTEXT.createUnmarshaller(); @SuppressWarnings("unchecked") JAXBElement<COORDINATORAPP> jaxbElement = (JAXBElement<COORDINATORAPP>) unmarshaller .unmarshal(resourceAsStream); return jaxbElement.getValue(); } catch (JAXBException e) { throw new FalconException(e); } finally { IOUtils.closeQuietly(resourceAsStream); } } // creates hive-site.xml configuration in conf dir for the given cluster on the same cluster. protected void createHiveConfiguration(Cluster cluster, Path workflowPath, String prefix) throws FalconException { Configuration hiveConf = getHiveCredentialsAsConf(cluster); try { Configuration conf = ClusterHelper.getConfiguration(cluster); FileSystem fs = HadoopClientFactory.get().createFileSystem(conf); // create hive conf to stagingDir Path confPath = new Path(workflowPath + "/conf"); persistHiveConfiguration(fs, confPath, hiveConf, prefix); } catch (IOException e) { throw new FalconException("Unable to create create hive site", e); } } protected void persistHiveConfiguration(FileSystem fs, Path confPath, Cluster cluster, String prefix) throws IOException { Configuration hiveConf = getHiveCredentialsAsConf(cluster); persistHiveConfiguration(fs, confPath, hiveConf, prefix); } private void persistHiveConfiguration(FileSystem fs, Path confPath, Configuration hiveConf, String prefix) throws IOException { OutputStream out = null; try { out = fs.create(new Path(confPath, prefix + "hive-site.xml")); hiveConf.writeXml(out); } finally { IOUtils.closeQuietly(out); } } private Configuration getHiveCredentialsAsConf(Cluster cluster) { Map<String, String> hiveCredentials = getHiveCredentials(cluster); Configuration hiveConf = new Configuration(false); for (Entry<String, String> entry : hiveCredentials.entrySet()) { hiveConf.set(entry.getKey(), entry.getValue()); } return hiveConf; } private Map<String, String> getHiveCredentials(Cluster cluster) { Map<String, String> hiveCredentials = new HashMap<String, String>(); String metaStoreUrl = ClusterHelper.getRegistryEndPoint(cluster); if (metaStoreUrl == null) { throw new IllegalStateException("Registry interface is not defined in cluster: " + cluster.getName()); } hiveCredentials.put(METASTOREURIS, metaStoreUrl); hiveCredentials.put("hive.metastore.execute.setugi", "true"); hiveCredentials.put("hcatNode", metaStoreUrl.replace("thrift", "hcat")); hiveCredentials.put("hcat.metastore.uri", metaStoreUrl); if (isSecurityEnabled) { String principal = ClusterHelper.getPropertyValue(cluster, SecurityUtil.HIVE_METASTORE_PRINCIPAL); hiveCredentials.put(METASTORE_KERBEROS_PRINCIPAL, principal); hiveCredentials.put(METASTORE_USE_THRIFT_SASL, "true"); hiveCredentials.put("hcat.metastore.principal", principal); } return hiveCredentials; } /** * This is only necessary if table is involved and is secure mode. * * @param cluster cluster entity * @param credentialName credential name * @return CREDENTIALS object */ protected CREDENTIAL createHCatalogCredential(Cluster cluster, String credentialName) { final String metaStoreUrl = ClusterHelper.getRegistryEndPoint(cluster); CREDENTIAL credential = new CREDENTIAL(); credential.setName(credentialName); credential.setType("hcat"); credential.getProperty().add(createProperty("hcat.metastore.uri", metaStoreUrl)); credential.getProperty().add(createProperty("hcat.metastore.principal", ClusterHelper.getPropertyValue(cluster, SecurityUtil.HIVE_METASTORE_PRINCIPAL))); return credential; } private CREDENTIAL.Property createProperty(String name, String value) { CREDENTIAL.Property property = new CREDENTIAL.Property(); property.setName(name); property.setValue(value); return property; } /** * This is only necessary if table is involved and is secure mode. * * @param workflowApp workflow xml * @param cluster cluster entity */ protected void addHCatalogCredentials(WORKFLOWAPP workflowApp, Cluster cluster, String credentialName) { CREDENTIALS credentials = workflowApp.getCredentials(); if (credentials == null) { credentials = new CREDENTIALS(); } credentials.getCredential().add(createHCatalogCredential(cluster, credentialName)); // add credential for workflow workflowApp.setCredentials(credentials); } /** * This is only necessary if table is involved and is secure mode. * * @param workflowApp workflow xml * @param cluster cluster entity */ protected void addHCatalogCredentials(WORKFLOWAPP workflowApp, Cluster cluster, String credentialName, Set<String> actions) { addHCatalogCredentials(workflowApp, cluster, credentialName); // add credential to each action for (Object object : workflowApp.getDecisionOrForkOrJoin()) { if (!(object instanceof ACTION)) { continue; } ACTION action = (ACTION) object; String actionName = action.getName(); if (actions.contains(actionName)) { action.setCred(credentialName); } } } protected abstract boolean shouldSetupHiveConfiguration(Cluster cluster, T entity) throws FalconException; protected void decorateWithOozieRetries(ACTION action) { Properties props = RuntimeProperties.get(); action.setRetryMax(props.getProperty("falcon.parentworkflow.retry.max", "3")); action.setRetryInterval(props.getProperty("falcon.parentworkflow.retry.interval.secs", "1")); } protected Properties createAppProperties(String clusterName, Path bundlePath, String user) throws FalconException { Cluster cluster = EntityUtil.getEntity(EntityType.CLUSTER, clusterName); Properties properties = new Properties(); if (cluster.getProperties() != null) { addClusterProperties(properties, cluster.getProperties().getProperties()); } properties.setProperty(OozieWorkflowEngine.NAME_NODE, ClusterHelper.getStorageUrl(cluster)); properties.setProperty(OozieWorkflowEngine.JOB_TRACKER, ClusterHelper.getMREndPoint(cluster)); properties.setProperty(OozieClient.BUNDLE_APP_PATH, "${" + OozieWorkflowEngine.NAME_NODE + "}" + bundlePath.toString()); properties.setProperty("colo.name", cluster.getColo()); properties.setProperty(OozieClient.USER_NAME, user); properties.setProperty(OozieClient.USE_SYSTEM_LIBPATH, "true"); properties.setProperty("falcon.libpath", ClusterHelper.getLocation(cluster, "working") + "/lib"); if (shouldSetupHiveConfiguration(cluster, entity)) { propagateHiveCredentials(cluster, properties); } LOG.info("Cluster: {}, PROPS: {}", cluster.getName(), properties); return properties; } private void addClusterProperties(Properties properties, List<Property> clusterProperties) { for (Property prop : clusterProperties) { properties.setProperty(prop.getName(), prop.getValue()); } } /** * This method propagates hive credentials for coordinator to authenticate against hive * for data availability triggers. * * @param cluster cluster entity * @param properties property object */ private void propagateHiveCredentials(Cluster cluster, Properties properties) { Map<String, String> hiveCredentials = getHiveCredentials(cluster); for (Entry<String, String> entry : hiveCredentials.entrySet()) { properties.setProperty(entry.getKey(), entry.getValue()); } } public abstract Date getNextStartTime(T entity, String cluster, Date now) throws FalconException; }