org.apache.oozie.service.WorkflowAppService.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.oozie.service.WorkflowAppService.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.oozie.service;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.mapred.JobConf;
import org.apache.oozie.client.OozieClient;
import org.apache.oozie.workflow.WorkflowApp;
import org.apache.oozie.workflow.WorkflowException;
import org.apache.oozie.util.IOUtils;
import org.apache.oozie.util.XConfiguration;
import org.apache.oozie.util.XLog;
import org.apache.oozie.ErrorCode;

import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringWriter;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

/**
 * Service that provides application workflow definition reading from the path and creation of the proto configuration.
 */
public abstract class WorkflowAppService implements Service {

    public static final String CONF_PREFIX = Service.CONF_PREFIX + "WorkflowAppService.";

    public static final String SYSTEM_LIB_PATH = CONF_PREFIX + "system.libpath";

    public static final String APP_LIB_PATH_LIST = "oozie.wf.application.lib";

    public static final String HADOOP_USER = "user.name";

    public static final String CONFG_MAX_WF_LENGTH = CONF_PREFIX + "WorkflowDefinitionMaxLength";

    public static final String OOZIE_SUBWORKFLOW_CLASSPATH_INHERITANCE = "oozie.subworkflow.classpath.inheritance";

    public static final String OOZIE_WF_SUBWORKFLOW_CLASSPATH_INHERITANCE = "oozie.wf.subworkflow.classpath.inheritance";

    private Path systemLibPath;
    private long maxWFLength;
    private boolean oozieSubWfCPInheritance;

    /**
     * Initialize the workflow application service.
     *
     * @param services services instance.
     */
    public void init(Services services) {
        Configuration conf = services.getConf();

        String path = ConfigurationService.get(conf, SYSTEM_LIB_PATH);
        if (path.trim().length() > 0) {
            systemLibPath = new Path(path.trim());
        }

        maxWFLength = conf.getInt(CONFG_MAX_WF_LENGTH, 100000);

        oozieSubWfCPInheritance = conf.getBoolean(OOZIE_SUBWORKFLOW_CLASSPATH_INHERITANCE, false);
    }

    /**
     * Destroy the workflow application service.
     */
    public void destroy() {
    }

    /**
     * Return the public interface for workflow application service.
     *
     * @return {@link WorkflowAppService}.
     */
    public Class<? extends Service> getInterface() {
        return WorkflowAppService.class;
    }

    /**
     * Read workflow definition.
     *
     *
     * @param appPath application path.
     * @param user user name.
     * @return workflow definition.
     * @throws WorkflowException thrown if the definition could not be read.
     */
    protected String readDefinition(String appPath, String user, Configuration conf) throws WorkflowException {
        try {
            URI uri = new URI(appPath);
            HadoopAccessorService has = Services.get().get(HadoopAccessorService.class);
            JobConf jobConf = has.createJobConf(uri.getAuthority());
            FileSystem fs = has.createFileSystem(user, uri, jobConf);

            // app path could be a directory
            Path path = new Path(uri.getPath());
            if (!fs.isFile(path)) {
                path = new Path(path, "workflow.xml");
            }

            FileStatus fsStatus = fs.getFileStatus(path);
            if (fsStatus.getLen() > this.maxWFLength) {
                throw new WorkflowException(ErrorCode.E0736, fsStatus.getLen(), this.maxWFLength);
            }

            Reader reader = new InputStreamReader(fs.open(path));
            StringWriter writer = new StringWriter();
            IOUtils.copyCharStream(reader, writer);
            return writer.toString();

        } catch (WorkflowException wfe) {
            throw wfe;
        } catch (IOException ex) {
            throw new WorkflowException(ErrorCode.E0710, ex.getMessage(), ex);
        } catch (URISyntaxException ex) {
            throw new WorkflowException(ErrorCode.E0711, appPath, ex.getMessage(), ex);
        } catch (HadoopAccessorException ex) {
            throw new WorkflowException(ex);
        } catch (Exception ex) {
            throw new WorkflowException(ErrorCode.E0710, ex.getMessage(), ex);
        }
    }

    /**
     * Create proto configuration. <p> The proto configuration includes the user,group and the paths which need to be
     * added to distributed cache. These paths include .jar,.so and the resource file paths.
     *
     * @param jobConf job configuration.
     * @param isWorkflowJob indicates if the job is a workflow job or not.
     * @return proto configuration.
     * @throws WorkflowException thrown if the proto action configuration could not be created.
     */
    public XConfiguration createProtoActionConf(Configuration jobConf, boolean isWorkflowJob)
            throws WorkflowException {
        try {
            HadoopAccessorService has = Services.get().get(HadoopAccessorService.class);
            URI uri = new URI(jobConf.get(OozieClient.APP_PATH));

            Configuration conf = has.createJobConf(uri.getAuthority());
            XConfiguration protoConf = new XConfiguration();

            String user = jobConf.get(OozieClient.USER_NAME);
            conf.set(OozieClient.USER_NAME, user);
            protoConf.set(OozieClient.USER_NAME, user);

            FileSystem fs = has.createFileSystem(user, uri, conf);

            Path appPath = new Path(uri);
            XLog.getLog(getClass()).debug("jobConf.libPath = " + jobConf.get(OozieClient.LIBPATH));
            XLog.getLog(getClass()).debug("jobConf.appPath = " + appPath);

            Collection<String> filePaths;
            if (isWorkflowJob) {
                // app path could be a directory
                Path path = new Path(uri.getPath());
                if (!fs.isFile(path)) {
                    filePaths = getLibFiles(fs, new Path(appPath + "/lib"));
                } else {
                    filePaths = getLibFiles(fs, new Path(appPath.getParent(), "lib"));
                }
            } else {
                filePaths = new LinkedHashSet<String>();
            }

            String[] libPaths = jobConf.getStrings(OozieClient.LIBPATH);
            if (libPaths != null && libPaths.length > 0) {
                for (int i = 0; i < libPaths.length; i++) {
                    if (libPaths[i].trim().length() > 0) {
                        Path libPath = new Path(libPaths[i].trim());
                        Collection<String> libFilePaths = getLibFiles(fs, libPath);
                        filePaths.addAll(libFilePaths);
                    }
                }
            }

            // Check if a subworkflow should inherit the libs from the parent WF
            // OOZIE_WF_SUBWORKFLOW_CLASSPATH_INHERITANCE has priority over OOZIE_SUBWORKFLOW_CLASSPATH_INHERITANCE from oozie-site
            // If OOZIE_WF_SUBWORKFLOW_CLASSPATH_INHERITANCE isn't specified, we use OOZIE_SUBWORKFLOW_CLASSPATH_INHERITANCE
            if (jobConf.getBoolean(OOZIE_WF_SUBWORKFLOW_CLASSPATH_INHERITANCE, oozieSubWfCPInheritance)) {
                // Keep any libs from a parent workflow that might already be in APP_LIB_PATH_LIST and also remove duplicates
                String[] parentFilePaths = jobConf.getStrings(APP_LIB_PATH_LIST);
                if (parentFilePaths != null && parentFilePaths.length > 0) {
                    String[] filePathsNames = filePaths.toArray(new String[filePaths.size()]);
                    for (int i = 0; i < filePathsNames.length; i++) {
                        Path p = new Path(filePathsNames[i]);
                        filePathsNames[i] = p.getName();
                    }
                    Arrays.sort(filePathsNames);
                    List<String> nonDuplicateParentFilePaths = new ArrayList<String>();
                    for (String parentFilePath : parentFilePaths) {
                        Path p = new Path(parentFilePath);
                        if (Arrays.binarySearch(filePathsNames, p.getName()) < 0) {
                            nonDuplicateParentFilePaths.add(parentFilePath);
                        }
                    }
                    filePaths.addAll(nonDuplicateParentFilePaths);
                }
            }

            protoConf.setStrings(APP_LIB_PATH_LIST, filePaths.toArray(new String[filePaths.size()]));

            //Add all properties start with 'oozie.'
            for (Map.Entry<String, String> entry : jobConf) {
                if (entry.getKey().startsWith("oozie.")) {
                    String name = entry.getKey();
                    String value = entry.getValue();
                    // if property already exists, should not overwrite
                    if (protoConf.get(name) == null) {
                        protoConf.set(name, value);
                    }
                }
            }
            return protoConf;
        } catch (IOException ex) {
            throw new WorkflowException(ErrorCode.E0712, jobConf.get(OozieClient.APP_PATH), ex.getMessage(), ex);
        } catch (URISyntaxException ex) {
            throw new WorkflowException(ErrorCode.E0711, jobConf.get(OozieClient.APP_PATH), ex.getMessage(), ex);
        } catch (HadoopAccessorException ex) {
            throw new WorkflowException(ex);
        } catch (Exception ex) {
            throw new WorkflowException(ErrorCode.E0712, jobConf.get(OozieClient.APP_PATH), ex.getMessage(), ex);
        }
    }

    /**
     * Parse workflow definition.
     *
     * @param jobConf
     * @return
     * @throws WorkflowException
     */
    public abstract WorkflowApp parseDef(Configuration jobConf) throws WorkflowException;

    /**
     * Parse workflow definition along with config-default.xml config
     *
     * @param jobConf job configuration
     * @param configDefault config from config-default.xml
     * @return workflow application thrown if the workflow application could not
     *         be parsed
     * @throws WorkflowException
     */
    public abstract WorkflowApp parseDef(Configuration jobConf, Configuration configDefault)
            throws WorkflowException;

    /**
     * Parse workflow definition.
     * @param wfXml workflow.
     * @param jobConf job configuration
     * @return workflow application.
     * @throws WorkflowException thrown if the workflow application could not be parsed.
     */
    public abstract WorkflowApp parseDef(String wfXml, Configuration jobConf) throws WorkflowException;

    /**
     * Get all library paths.
     *
     * @param fs file system object.
     * @param libPath hdfs library path.
     * @return list of paths.
     * @throws IOException thrown if the lib paths could not be obtained.
     */
    private Collection<String> getLibFiles(FileSystem fs, Path libPath) throws IOException {
        Set<String> libPaths = new LinkedHashSet<String>();
        if (fs.exists(libPath)) {
            FileStatus[] files = fs.listStatus(libPath, new NoPathFilter());

            for (FileStatus file : files) {
                libPaths.add(file.getPath().toUri().toString());
            }
        } else {
            XLog.getLog(getClass()).warn("libpath [{0}] does not exist", libPath);
        }
        return libPaths;
    }

    /*
     * Filter class doing no filtering.
     * We dont need define this class, but seems fs.listStatus() is not working properly without this.
     * So providing this dummy no filtering Filter class.
     */
    private class NoPathFilter implements PathFilter {
        @Override
        public boolean accept(Path path) {
            return true;
        }
    }

    /**
     * Returns Oozie system libpath.
     *
     * @return Oozie system libpath (sharelib) in HDFS if present, otherwise it returns <code>NULL</code>.
     */
    public Path getSystemLibPath() {
        return systemLibPath;
    }
}