com.twitter.ambrose.cascading.AmbroseCascadingNotifier.java Source code

Introduction

Here is the source code for com.twitter.ambrose.cascading.AmbroseCascadingNotifier.java
Source

/*
Copyright ......
    
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
    
http://www.apache.org/licenses/LICENSE-2.0
    
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package com.twitter.ambrose.cascading;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Properties;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.mapred.JobClient;
import org.jgrapht.graph.SimpleDirectedGraph;

import cascading.flow.Flow;
import cascading.flow.FlowListener;
import cascading.flow.FlowStep;
import cascading.flow.FlowStepListener;
import cascading.flow.Flows;
import cascading.flow.hadoop.HadoopFlowStep;
import cascading.flow.planner.BaseFlowStep;
import cascading.stats.hadoop.HadoopStepStats;

import com.google.common.collect.Maps;
import com.twitter.ambrose.model.DAGNode;
import com.twitter.ambrose.model.Event;
import com.twitter.ambrose.model.Job;
import com.twitter.ambrose.model.hadoop.MapReduceHelper;
import com.twitter.ambrose.service.StatsWriteService;
import com.twitter.ambrose.util.AmbroseUtils;

/**
 * CascadingNotifier that collects plan and job information from within a cascading
 * runtime, builds Ambrose model objects, and passes the objects to an Ambrose
 * StatsWriteService object. This listener can be used regardless of what mode
 * Ambrose is running in.
 *
 * @see EmbeddedAmbroseCascadingNotifier for a subclass that can be used to run
 * an embedded Ambrose web server from Main method.
 * @author Ahmed Mohsen
 */
public class AmbroseCascadingNotifier implements FlowListener, FlowStepListener {

    protected Log log = LogFactory.getLog(getClass());
    private StatsWriteService statsWriteService;
    private List<Job> jobs = new ArrayList<Job>();
    private Map<String, DAGNode<CascadingJob>> dagNodeNameMap = Maps.newTreeMap();
    private Map<String, DAGNode<CascadingJob>> dagNodeJobIdMap = Maps.newTreeMap();
    private HashSet<String> completedJobIds = new HashSet<String>();
    private int totalNumberOfJobs;
    private int runningJobs = 0;
    private String currentFlowId;

    private MapReduceHelper mapReduceHelper = new MapReduceHelper();

    /**
     * Initialize this class with an instance of StatsWriteService to push stats
     * to.
     *
     * @param statsWriteService
     */
    public AmbroseCascadingNotifier(StatsWriteService statsWriteService) {
        this.statsWriteService = statsWriteService;
    }

    protected StatsWriteService getStatsWriteService() {
        return statsWriteService;
    }

    /**
     * The onStarting event is fired when a Flow instance receives the start()
     * message. -a Flow is cut down into executing units called stepFlow
     * -stepFlow contains a stepFlowJob which represents the mapreduce job to be
     * submitted to Hadoop -the DAG graph is constructed from the step graph
     * found in flow object
     *
     * @param flow
     */
    @Override
    public void onStarting(Flow flow) {
        //init flow
        List<BaseFlowStep> steps = flow.getFlowSteps();
        totalNumberOfJobs = steps.size();
        currentFlowId = flow.getID();

        Properties props = new Properties();
        props.putAll(flow.getConfigAsProperties());
        try {
            statsWriteService.initWriteService(props);
        } catch (IOException ioe) {
            throw new RuntimeException("Exception while initializing statsWriteService", ioe);
        }

        // convert the graph generated by cascading toDAGNodes Graph to be sent to ambrose
        AmbroseCascadingGraphConverter convertor = new AmbroseCascadingGraphConverter(
                (SimpleDirectedGraph) Flows.getStepGraphFrom(flow), dagNodeNameMap);
        convertor.convert();
        AmbroseUtils.sendDagNodeNameMap(statsWriteService, currentFlowId, this.dagNodeNameMap);
    }

    /**
     * The onStopping event is fired when a Flow instance receives the stop()
     * message.
     *
     * @param flow
     */
    @Override
    public void onStopping(Flow flow) {
    }

    /**
     * The onCompleted event is fired when a Flow instance has completed all
     * work whether if was success or failed. If there was a thrown exception,
     * onThrowable will be fired before this event.
     *
     * @param flow
     */
    @Override
    public void onCompleted(Flow flow) {
    }

    /**
     * The onThrowable event is fired if any child
     * {@link cascading.flow.FlowStep} throws a Throwable type. This throwable
     * is passed as an argument to the event. This event method should return
     * true if the given throwable was handled and should not be rethrown from
     * the {@link Flow#complete()} method.
     *
     * @param flow
     * @param throwable
     * @return true if this listener has handled the given throwable
     */
    @Override
    public boolean onThrowable(Flow flow, Throwable throwable) {
        return false;
    }

    /**
     * onStepStarting event is fired whenever a job is submitted to Hadoop and begun
     * its excution
     *
     * @param flowStep the step in the flow that represents the MapReduce job
     */
    @Override
    public void onStepStarting(FlowStep flowStep) {
        //getting Hadoop job client
        HadoopStepStats stats = (HadoopStepStats) ((HadoopFlowStep) flowStep).getFlowStepStats();
        String assignedJobId = stats.getJobID();
        String jobName = flowStep.getName();
        JobClient jc = stats.getJobClient();

        runningJobs++; //update overall progress

        DAGNode<CascadingJob> node = this.dagNodeNameMap.get(jobName);
        if (node == null) {
            log.warn("jobStartedNotification - unrecognized operator name found (" + jobName + ") for jobId "
                    + assignedJobId);
        } else {
            CascadingJob job = node.getJob();
            job.setId(assignedJobId);
            job.setJobStats(stats);
            mapReduceHelper.addMapReduceJobState(job, jc);

            dagNodeJobIdMap.put(job.getId(), node);
            AmbroseUtils.pushEvent(statsWriteService, currentFlowId, new Event.JobStartedEvent(node));
        }
    }

    /**
     * onStepCompleted event is fired when a stepFlowJob completed its work
     *
     * @param flowStep the step in the flow that represents the MapReduce job
     */
    @Override
    public void onStepCompleted(FlowStep flowStep) {
        HadoopStepStats stats = (HadoopStepStats) flowStep.getFlowStepStats();
        String jobId = stats.getJobID();

        //get job node
        DAGNode<CascadingJob> node = dagNodeJobIdMap.get(jobId);
        if (node == null) {
            log.warn("Unrecognized jobId reported for succeeded job: " + stats.getJobID());
            return;
        }
        addCompletedJobStats(node.getJob(), stats);
        AmbroseUtils.pushEvent(statsWriteService, currentFlowId, new Event.JobFinishedEvent(node));
    }

    /**
     * onStepThrowable event is fired if job failed during execution A job_failed
     * event is pushed with node represents the failed job
     *
     * @param flowStep the step in the flow that represents the MapReduce job
     * @param throwable  the exception that caused the job to fail
     */
    @Override
    public boolean onStepThrowable(FlowStep flowStep, Throwable throwable) {
        HadoopStepStats stats = (HadoopStepStats) flowStep.getFlowStepStats();
        String jobName = flowStep.getName();

        //get job node
        DAGNode<CascadingJob> node = dagNodeNameMap.get(jobName);
        if (node == null) {
            log.warn("Unrecognized jobId reported for succeeded job: " + stats.getJobID());
            return false;
        }
        addCompletedJobStats(node.getJob(), stats);
        AmbroseUtils.pushEvent(statsWriteService, currentFlowId, new Event.JobFailedEvent(node));
        return false;
    }

    /**
     * onStepProgressing event is fired whenever a job made a progress
     *
     * @param flowStep the step in the flow that represents the MapReduce job
     */
    @Override
    public void onStepRunning(FlowStep flowStep) {
        //getting Hadoop running job and job client
        HadoopStepStats stats = (HadoopStepStats) flowStep.getFlowStepStats();
        JobClient jc = stats.getJobClient();

        // first we report the scripts progress
        int progress = (int) (((runningJobs * 1.0) / totalNumberOfJobs) * 100);
        AmbroseUtils.pushWorkflowProgressEvent(statsWriteService, currentFlowId, progress);

        //get job node
        String jobId = stats.getJobID();
        DAGNode<CascadingJob> node = dagNodeJobIdMap.get(jobId);
        if (node == null) {
            log.warn("Unrecognized jobId reported for succeeded job: " + stats.getJobID());
            return;
        }

        //only push job progress events for a completed job once
        if (completedJobIds.contains(node.getJob().getId())) {
            return;
        }

        mapReduceHelper.addMapReduceJobState(node.getJob(), jc);

        if (node.getJob().getMapReduceJobState() != null) {
            AmbroseUtils.pushEvent(statsWriteService, currentFlowId, new Event.JobProgressEvent(node));

            if (node.getJob().getMapReduceJobState().isComplete()) {
                completedJobIds.add(node.getJob().getId());
            }
        }
    }

    @Override
    public void onStepStopping(FlowStep flowStep) {
    }

    private void addCompletedJobStats(CascadingJob job, HadoopStepStats stats) {
        job.setJobStats(stats);
        jobs.add(job);
    }
}