hydrograph.engine.cascading.integration.HydrographRuntime.java Source code

Introduction

Here is the source code for hydrograph.engine.cascading.integration.HydrographRuntime.java
Source

/*******************************************************************************
 * Copyright 2017 Capital One Services, LLC and Bitwise, Inc.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * http://www.apache.org/licenses/LICENSE-2.0
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License
 *******************************************************************************/
package hydrograph.engine.cascading.integration;

import cascading.cascade.Cascade;
import cascading.flow.Flow;
import cascading.property.AppProps;
import hydrograph.engine.component.mapping.ComponentAdapterFactory;
import hydrograph.engine.core.core.HydrographJob;
import hydrograph.engine.core.core.HydrographRuntimeService;
import hydrograph.engine.core.flowmanipulation.FlowManipulationContext;
import hydrograph.engine.core.flowmanipulation.FlowManipulationHandler;
import hydrograph.engine.core.helper.JAXBTraversal;
import hydrograph.engine.core.props.PropertiesLoader;
import hydrograph.engine.core.schemapropagation.SchemaFieldHandler;
import hydrograph.engine.core.utilities.CommandLineOptionsProcessor;
import hydrograph.engine.core.utilities.GeneralUtilities;
import hydrograph.engine.flow.utils.ExecutionTrackingListener;
import hydrograph.engine.hadoop.utils.HadoopConfigProvider;
import hydrograph.engine.jaxb.commontypes.TypeProperties.Property;
import hydrograph.engine.utilities.ExecutionTrackingUtilities;
import hydrograph.engine.utilities.HiveMetastoreTokenProvider;
import hydrograph.engine.utilities.UserClassLoader;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.thrift.TException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.Properties;

@SuppressWarnings({ "rawtypes" })
public class HydrographRuntime implements HydrographRuntimeService {

    final String EXECUTION_TRACKING = "hydrograph.execution.tracking";
    private static final String OPTION_DOT_PATH = "dotpath";
    private Properties hadoopProperties = new Properties();
    private ExecutionTrackingListener executionTrackingListener;
    private FlowBuilder flowBuilder;
    private RuntimeContext runtimeContext;
    private String[] args;
    private PropertiesLoader config;
    private FlowManipulationContext flowManipulationContext;
    private static Logger LOG = LoggerFactory.getLogger(HydrographRuntime.class);

    public void executeProcess(String[] args, HydrographJob hydrographJob) {
        this.args = args != null ? args.clone() : null;
        config = PropertiesLoader.getInstance();
        LOG.info("Invoking initialize on runtime service");
        initialize(config.getRuntimeServiceProperties(), this.args, hydrographJob, null, null);
        LOG.info("Preparation started");
        prepareToExecute();
        LOG.info("Preparation completed. Now starting execution");
        LOG.info("Execution Started");
        execute();
        LOG.info("Execution Complete");
        oncomplete();
    }

    public void initialize(Properties config, String[] args, HydrographJob hydrographJob, String jobId,
            String UDFPath) {

        AppProps.setApplicationName(hadoopProperties, hydrographJob.getJAXBObject().getName());

        hadoopProperties.putAll(config);

        Configuration conf = new HadoopConfigProvider(hadoopProperties).getJobConf();

        SchemaFieldHandler schemaFieldHandler = new SchemaFieldHandler(
                hydrographJob.getJAXBObject().getInputsOrOutputsOrStraightPulls());

        flowManipulationContext = new FlowManipulationContext(hydrographJob, args, schemaFieldHandler, jobId);

        FlowManipulationHandler flowManipulationHandler = new FlowManipulationHandler();

        hydrographJob = flowManipulationHandler.execute(flowManipulationContext);

        if (hydrographJob.getJAXBObject().getRuntimeProperties() != null
                && hydrographJob.getJAXBObject().getRuntimeProperties().getProperty() != null) {
            for (Property property : hydrographJob.getJAXBObject().getRuntimeProperties().getProperty()) {
                hadoopProperties.put(property.getName(), property.getValue());
            }
        }

        JAXBTraversal traversal = new JAXBTraversal(hydrographJob.getJAXBObject());

        if (traversal.isHiveComponentPresentInFlow()) {
            try {
                HiveMetastoreTokenProvider.obtainTokenForHiveMetastore(conf);
            } catch (TException e) {
                throw new HydrographRuntimeException(e);
            } catch (IOException e) {
                throw new HydrographRuntimeException(e);
            }
        }

        String[] otherArgs;
        try {
            otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
        } catch (IOException e) {
            throw new HydrographRuntimeException(e);
        }

        String argsString = "";
        for (String arg : otherArgs) {
            argsString = argsString + " " + arg;
        }
        LOG.info("After processing arguments are:" + argsString);
        this.args = otherArgs;
        // setJar(otherArgs);

        hadoopProperties.putAll(conf.getValByRegex(".*"));

        ComponentAdapterFactory componentAdapterFactory = new ComponentAdapterFactory(
                hydrographJob.getJAXBObject());

        flowBuilder = new FlowBuilder();

        runtimeContext = new RuntimeContext(hydrographJob, traversal, hadoopProperties, componentAdapterFactory,
                flowManipulationContext.getSchemaFieldHandler(), UDFPath);

        LOG.info("Graph '" + runtimeContext.getHydrographJob().getJAXBObject().getName()
                + "' initialized successfully");
    }

    @Override
    public void prepareToExecute() {
        flowBuilder.buildFlow(runtimeContext);

        if (GeneralUtilities.IsArgOptionPresent(args, OPTION_DOT_PATH)) {
            writeDotFiles();
        }

    }

    @Override
    public void execute() {
        if (GeneralUtilities.IsArgOptionPresent(args, CommandLineOptionsProcessor.OPTION_NO_EXECUTION)) {
            LOG.info(CommandLineOptionsProcessor.OPTION_NO_EXECUTION + " option is provided so skipping execution");
            return;
        }
        if (ExecutionTrackingUtilities.getExecutionTrackingClass(EXECUTION_TRACKING) != null) {
            executionTrackingListener = (ExecutionTrackingListener) UserClassLoader.loadAndInitClass(
                    ExecutionTrackingUtilities.getExecutionTrackingClass(EXECUTION_TRACKING), "execution tracking");
            executionTrackingListener.addListener(runtimeContext);
        }
        for (Cascade cascade : runtimeContext.getCascade()) {
            cascade.complete();
        }
    }

    @Override
    public void oncomplete() {
        flowBuilder.cleanup(flowManipulationContext.getTmpPath(), runtimeContext);
    }

    /**
     * Returns the statistics of components in a job.
     *
     * @see hydrograph.engine.execution.tracking.ComponentInfo
     */
    @Override
    public Object getExecutionStatus() {
        if (executionTrackingListener != null)
            return executionTrackingListener.getStatus();
        return null;
    }

    public Cascade[] getFlow() {
        return runtimeContext.getCascadingFlows();
    }

    private void writeDotFiles() {

        String[] paths = GeneralUtilities.getArgsOption(args, OPTION_DOT_PATH);

        if (paths == null) {
            throw new HydrographRuntimeException(
                    OPTION_DOT_PATH + " option is provided but is not followed by path");
        }
        String basePath = paths[0];
        LOG.info("Dot files will be written under " + basePath);

        String flowDotPath = basePath + "/" + runtimeContext.getHydrographJob().getJAXBObject().getName() + "/"
                + "flow";
        String flowStepDotPath = basePath + "/" + runtimeContext.getHydrographJob().getJAXBObject().getName() + "/"
                + "flowstep";

        int batchCounter = 0;
        for (Cascade cascadingFlow : runtimeContext.getCascadingFlows()) {
            for (Flow flows : cascadingFlow.getFlows()) {
                flows.writeDOT(flowDotPath + "_" + batchCounter);
                flows.writeStepsDOT(flowStepDotPath + "_" + batchCounter);
                batchCounter++;
            }
        }
    }

    public class HydrographRuntimeException extends RuntimeException {

        /**
         *
         */
        private static final long serialVersionUID = -7891832980227676974L;

        public HydrographRuntimeException(String msg) {
            super(msg);
        }

        public HydrographRuntimeException(Throwable e) {
            super(e);
        }
    }

    /**
     * Method to kill the job
     */
    @Override
    public void kill() {
        LOG.info("Kill signal received");
        if (runtimeContext.getCascade() != null) {
            for (Cascade cascade : runtimeContext.getCascade()) {
                LOG.info("Killing Cascading jobs: " + cascade.getID());
                cascade.stop();
            }
        } else
            LOG.info("No cascading jobs present to kill. Exiting code.");
        System.exit(0);
    }
}