com.scaleunlimited.cascading.FlowMonitor.java Source code

Java tutorial

Introduction

Here is the source code for com.scaleunlimited.cascading.FlowMonitor.java

Source

/**
 * Copyright 2010 TransPac Software, Inc.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.scaleunlimited.cascading;

import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import org.apache.commons.io.IOUtils;
import org.apache.commons.io.output.FileWriterWithEncoding;
import org.apache.commons.lang.StringEscapeUtils;
import org.apache.hadoop.mapred.JobConf;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import cascading.flow.Flow;
import cascading.flow.FlowListener;
import cascading.flow.FlowStep;
import cascading.flow.StepCounters;
import cascading.stats.CascadingStats.Status;
import cascading.stats.FlowStats;
import cascading.stats.FlowStepStats;

public class FlowMonitor<Config> {

    private static final Logger LOGGER = LoggerFactory.getLogger(FlowMonitor.class);

    public static final String FILENAME = "flow-monitor.html";
    public static final int DEFAULT_UPDATE_INTERVAL = 10000;
    public static final int DEFAULT_ROWS_PER_STEP = 10;

    private static final String MONITOR_TOP_HTML = "/monitor-top.html";
    private static final String MONITOR_STEP_HTML = "/monitor-step.html";
    private static final String MONITOR_ROW_HTML = "/monitor-row.html";

    private static final String DEFAULT_HADOOP_LOG_DIR = "/mnt/hadoop/logs/";
    private static final String DEFAULT_LOCAL_LOG_DIR = "./";

    private Flow<Config> _flow;
    private Throwable _flowException;
    private int _updateInterval;
    private boolean _includeCascadingCounters;
    private File _htmlDir;
    private List<IMonitorTask> _tasks;
    private int _timeEntriesPerStep;
    private List<StepEntry> _stepEntries;

    private String _htmlTopTemplate;
    private String _htmlStepTemplate;
    private String _htmlRowTemplate;

    public FlowMonitor(Flow<Config> flow) throws IOException {
        _flow = flow;

        _updateInterval = DEFAULT_UPDATE_INTERVAL;
        _timeEntriesPerStep = DEFAULT_ROWS_PER_STEP;
        _includeCascadingCounters = false;
        _htmlDir = null;

        _tasks = new ArrayList<IMonitorTask>();

        _htmlTopTemplate = IOUtils.toString(FlowMonitor.class.getResourceAsStream(MONITOR_TOP_HTML));
        _htmlStepTemplate = IOUtils.toString(FlowMonitor.class.getResourceAsStream(MONITOR_STEP_HTML));
        _htmlRowTemplate = IOUtils.toString(FlowMonitor.class.getResourceAsStream(MONITOR_ROW_HTML));

        _stepEntries = new ArrayList<StepEntry>();
        for (FlowStep<Config> step : _flow.getFlowSteps()) {
            _stepEntries.add(new StepEntry(step));
        }
    }

    public Flow<Config> getFlow() {
        return _flow;
    }

    public File getHtmlDirectory() {
        return _htmlDir;
    }

    public void setHtmlDirectory(String dir) {
        _htmlDir = new File(dir);
    }

    public int getRowsPerStep() {
        return _timeEntriesPerStep;
    }

    public void setRowsPerStep(int rowsPerStep) {
        _timeEntriesPerStep = rowsPerStep;
    }

    public int getUpdateInterval() {
        return _updateInterval;
    }

    public void setUpdateInterval(int updateInterval) {
        _updateInterval = updateInterval;
    }

    public void setIncludeCascadingCounters(boolean includeCascadingCounters) {
        _includeCascadingCounters = includeCascadingCounters;
    }

    public boolean isIncludeCascadingCounters() {
        return _includeCascadingCounters;
    }

    public void addMonitorTask(IMonitorTask task) {
        _tasks.add(task);
    }

    @SuppressWarnings("rawtypes")
    public boolean run(Enum... counters) throws Throwable {
        if (_htmlDir == null) {
            _htmlDir = getDefaultLogDir(_flow.getConfig());
        }

        _flowException = null;
        FlowListener catchExceptions = new FlowListener() {

            @Override
            public void onCompleted(Flow flow) {
            }

            @Override
            public void onStarting(Flow flow) {
            }

            @Override
            public void onStopping(Flow flow) {
            }

            @Override
            public boolean onThrowable(Flow flow, Throwable t) {
                _flowException = t;
                return true;
            }
        };

        _flow.addListener(catchExceptions);
        _flow.start();

        FlowStats stats;
        Set<String> loggingStatus = new HashSet<String>();
        loggingStatus.add(Status.RUNNING.name());
        loggingStatus.add(Status.SUCCESSFUL.name());
        loggingStatus.add(Status.STOPPED.name());
        loggingStatus.add(Status.FAILED.name());

        do {
            stats = _flow.getFlowStats();
            List<FlowStepStats> stepStats = stats.getFlowStepStats();
            for (FlowStepStats stepStat : stepStats) {
                String stepId = stepStat.getID();
                StepEntry stepEntry = findStepById(stepId);
                Status oldStatus = stepEntry.getStatus();
                Status newStatus = stepStat.getStatus();
                if (oldStatus != newStatus) {
                    stepEntry.setStartTime(stepStat.getStartTime());
                    stepEntry.setStatus(stepStat.getStatus());
                }
                if (loggingStatus.contains(newStatus.name())) {
                    if (stepStat.isFinished()) {
                        stepEntry.setDuration(stepStat.getDuration());
                    } else if (stepStat.isRunning()) {
                        stepEntry.setDuration(System.currentTimeMillis() - stepEntry.getStartTime());
                    } else {
                        // Duration isn't known
                        stepEntry.setDuration(0);
                    }

                    stepEntry.addTimeEntry(makeTimeEntry(stepEntry, stepStat, counters), _timeEntriesPerStep);
                }
            }

            // Now we can build our resulting table
            StringBuilder topTemplate = new StringBuilder(_htmlTopTemplate);
            replace(topTemplate, "%flowname%", StringEscapeUtils.escapeHtml(_flow.getName()));

            for (StepEntry stepEntry : _stepEntries) {
                StringBuilder stepTemplate = new StringBuilder(_htmlStepTemplate);
                replaceHtml(stepTemplate, "%stepname%", stepEntry.getName());
                replaceHtml(stepTemplate, "%stepstatus%", "" + stepEntry.getStatus());
                replaceHtml(stepTemplate, "%stepstart%", new Date(stepEntry.getStartTime()).toString());
                replaceHtml(stepTemplate, "%stepduration%", "" + (stepEntry.getDuration() / 1000));

                replace(stepTemplate, "%counternames%", getTableHeader(stepEntry.getStep(), counters));

                // Now we need to build rows of data, for steps that are running or have finished.
                if (stepEntry.getStatus() != Status.PENDING) {
                    for (TimeEntry row : stepEntry.getTimerEntries()) {
                        StringBuilder rowTemplate = new StringBuilder(_htmlRowTemplate);
                        replaceHtml(rowTemplate, "%timeoffset%", "" + (row.getTimeDelta() / 1000));
                        replace(rowTemplate, "%countervalues%", getCounterValues(row.getCounterValues()));
                        insert(stepTemplate, "%steprows%", rowTemplate.toString());
                    }
                }

                // Get rid of position marker we used during inserts.
                replace(stepTemplate, "%steprows%", "");

                insert(topTemplate, "%steps%", stepTemplate.toString());
            }

            // Get rid of position marker we used during inserts.
            replace(topTemplate, "%steps%", "");

            // We've got the template ready to go, create the file.
            File htmlFile = new File(_htmlDir, FILENAME);
            FileWriterWithEncoding fw = new FileWriterWithEncoding(htmlFile, "UTF-8");
            IOUtils.write(topTemplate.toString(), fw);
            fw.close();

            Thread.sleep(_updateInterval);

        } while (!stats.isFinished());

        // Create a copy of the file as an archive, once we're done.
        File htmlFile = new File(_htmlDir, FILENAME);
        File archiveFile = new File(_htmlDir, String.format("%s-%s", _flow.getName(), FILENAME));
        archiveFile.delete();

        if (!htmlFile.exists() || archiveFile.exists()) {
            LOGGER.warn("Unable to create archive of file " + htmlFile.getAbsolutePath());
        } else {
            try {
                String content = IOUtils.toString(new FileReader(htmlFile));
                FileWriterWithEncoding fw = new FileWriterWithEncoding(archiveFile, "UTF-8");
                IOUtils.write(content, fw);
                fw.close();
            } catch (Exception e) {
                LOGGER.warn("Unable to create archive of file " + htmlFile.getAbsolutePath(), e);
            }
        }

        if (stats.isFailed() && (_flowException != null)) {
            throw _flowException;
        }

        return stats.isSuccessful();
    }

    private StepEntry findStepById(String stepId) {
        for (StepEntry stepEntry : _stepEntries) {
            if (stepEntry.getId().equals(stepId)) {
                return stepEntry;
            }
        }

        throw new RuntimeException("Can't find StepEntry with id " + stepId);
    }

    @SuppressWarnings("rawtypes")
    private TimeEntry makeTimeEntry(StepEntry stepEntry, FlowStepStats stepStats, Enum... counters) {
        FlowStep flowStep = stepEntry.getStep();
        TimeEntry result = new TimeEntry(stepEntry.getDuration());
        for (Enum counter : counters) {
            result.addCounterValue("" + StepUtils.safeGetCounter(stepStats, counter));
        }

        for (IMonitorTask task : _tasks) {
            try {
                result.addCounterValue(task.getValue(_flow, flowStep, stepStats));
            } catch (Throwable t) {
                LOGGER.error("Exception thrown by MonitorTask!", t);
                result.addCounterValue("<error>");
            }
        }

        if (_includeCascadingCounters) {
            for (Enum counter : StepCounters.values()) {
                result.addCounterValue("" + StepUtils.safeGetCounter(stepStats, counter));
            }
        }

        return result;
    }

    @SuppressWarnings("rawtypes")
    private String getTableHeader(FlowStep flowStep, Enum... counters) {
        StringBuilder header = new StringBuilder();
        for (Enum counter : counters) {
            header.append("<td>");
            header.append(StringEscapeUtils.escapeHtml(counter.toString()));
            header.append("</td>");
        }

        for (IMonitorTask task : _tasks) {
            header.append("<td>");
            header.append(StringEscapeUtils.escapeHtml(task.getName(_flow, flowStep)));
            header.append("</td>");
        }

        if (_includeCascadingCounters) {
            for (Enum counter : StepCounters.values()) {
                header.append("<td>");
                header.append(StringEscapeUtils.escapeHtml(counter.toString()));
                header.append("</td>");
            }
        }

        return header.toString();
    }

    private String getCounterValues(List<String> counterValues) {
        StringBuilder result = new StringBuilder();
        for (String value : counterValues) {
            result.append("<td>");
            result.append(StringEscapeUtils.escapeHtml(value));
            result.append("</td>");
        }

        return result.toString();
    }

    private void replace(StringBuilder template, String key, String value) {
        int offset = template.indexOf(key);
        if (offset == -1) {
            throw new RuntimeException("Key doesn't exist in template: " + key);
        }

        template.delete(offset, offset + key.length());
        template.insert(offset, value);
    }

    private void replaceHtml(StringBuilder template, String key, String value) {
        replace(template, key, StringEscapeUtils.escapeHtml(value));
    }

    private void insert(StringBuilder template, String key, String value) {
        int offset = template.indexOf(key);
        if (offset == -1) {
            throw new RuntimeException("Key doesn't exist in template: " + key);
        }

        template.insert(offset, value);
    }

    private File getDefaultLogDir(Config config) {
        File result;

        if (isJobLocal(config)) {
            result = new File(DEFAULT_LOCAL_LOG_DIR);
            if (!result.exists()) {
                result.mkdir();
            }
        } else {
            String hadoopLogDir = System.getProperty("HADOOP_LOG_DIR");
            if (hadoopLogDir == null) {
                hadoopLogDir = System.getProperty("hadoop.log.dir");
            }

            if (hadoopLogDir == null) {
                String hadoopHomeDir = System.getProperty("HADOOP_HOME");
                if (hadoopHomeDir != null) {
                    hadoopLogDir = hadoopHomeDir = "/logs";
                }
            }

            if (hadoopLogDir == null) {
                hadoopLogDir = DEFAULT_HADOOP_LOG_DIR;
            }

            LOGGER.info("Setting monitor output directory to: " + hadoopLogDir);
            result = new File(hadoopLogDir);
        }

        if (!result.exists() || !result.isDirectory()) {
            throw new RuntimeException("Can't find default location for HTML file: " + result);
        }

        return result;
    }

    private boolean isJobLocal(Config config) {
        if (config instanceof JobConf) {
            JobConf conf = (JobConf) config;
            return conf.get("mapred.job.tracker").equalsIgnoreCase("local");
        } else {
            return true;
        }
    }

    private static class TimeEntry {
        private long _timeDelta;
        private List<String> _counterValues;

        public TimeEntry(long timeDelta) {
            _timeDelta = timeDelta;
            _counterValues = new ArrayList<String>();
        }

        @SuppressWarnings("unused")
        public TimeEntry(long timeDelta, List<String> counterValues) {
            _timeDelta = timeDelta;
            _counterValues = counterValues;
        }

        public long getTimeDelta() {
            return _timeDelta;
        }

        public List<String> getCounterValues() {
            return _counterValues;
        }

        public void addCounterValue(String counterValue) {
            _counterValues.add(counterValue);
        }
    }

    @SuppressWarnings("rawtypes")
    private static class StepEntry {
        Status _status;
        FlowStep _step;
        long _startTime;
        long _duration;
        List<TimeEntry> _timeEntries;

        public StepEntry(FlowStep step) {
            _step = step;
            _status = Status.PENDING;

            _timeEntries = new ArrayList<TimeEntry>();
        }

        public FlowStep getStep() {
            return _step;
        }

        public Status getStatus() {
            return _status;
        }

        public void setStatus(Status status) {
            _status = status;
        }

        public String getName() {
            return _step.getStepDisplayName();
        }

        public String getId() {
            return _step.getID();
        }

        public long getStartTime() {
            return _startTime;
        }

        public void setStartTime(long startTime) {
            _startTime = startTime;
        }

        public long getDuration() {
            return _duration;
        }

        public void setDuration(long duration) {
            _duration = duration;
        }

        public List<TimeEntry> getTimerEntries() {
            return _timeEntries;
        }

        public void addTimeEntry(TimeEntry timeEntry, int maxTimeEntries) {
            _timeEntries.add(timeEntry);
            while (_timeEntries.size() > maxTimeEntries) {
                _timeEntries.remove(0);
            }
        }

    }

}