org.apache.eagle.jpm.mr.running.storm.MRRunningJobFetchSpout.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.eagle.jpm.mr.running.storm.MRRunningJobFetchSpout.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
*/

package org.apache.eagle.jpm.mr.running.storm;

import org.apache.commons.collections.CollectionUtils;
import org.apache.eagle.jpm.mr.running.MRRunningJobConfig;
import org.apache.eagle.jpm.mr.running.recover.MRRunningJobManager;
import org.apache.eagle.jpm.mr.runningentity.JobExecutionAPIEntity;
import org.apache.eagle.jpm.util.Constants;
import org.apache.eagle.jpm.util.Utils;
import org.apache.eagle.jpm.util.resourcefetch.RMResourceFetcher;
import org.apache.eagle.jpm.util.resourcefetch.ResourceFetcher;
import org.apache.eagle.jpm.util.resourcefetch.model.AppInfo;

import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichSpout;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;
import org.apache.zookeeper.KeeperException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.*;

public class MRRunningJobFetchSpout extends BaseRichSpout {
    private static final Logger LOG = LoggerFactory.getLogger(MRRunningJobFetchSpout.class);
    private MRRunningJobConfig.EndpointConfig endpointConfig;
    private MRRunningJobConfig.ZKStateConfig zkStateConfig;
    private ResourceFetcher resourceFetcher;
    private SpoutOutputCollector collector;
    private boolean init;
    private transient MRRunningJobManager runningJobManager;
    private Set<String> runningYarnApps;

    public MRRunningJobFetchSpout(MRRunningJobConfig.EndpointConfig endpointConfig,
            MRRunningJobConfig.ZKStateConfig zkStateConfig) {
        this.endpointConfig = endpointConfig;
        this.zkStateConfig = zkStateConfig;
        this.init = false;
        this.runningYarnApps = new HashSet<>();
    }

    @Override
    public void open(Map map, TopologyContext topologyContext, SpoutOutputCollector spoutOutputCollector) {
        resourceFetcher = new RMResourceFetcher(endpointConfig.rmUrls);
        collector = spoutOutputCollector;
        this.runningJobManager = new MRRunningJobManager(zkStateConfig);
    }

    @Override
    public void nextTuple() {
        LOG.info("Start to fetch mr running jobs");
        try {
            List<AppInfo> apps;
            Map<String, Map<String, JobExecutionAPIEntity>> mrApps = null;
            if (!this.init) {
                mrApps = recoverRunningApps();

                apps = new ArrayList<>();
                for (String appId : mrApps.keySet()) {
                    Map<String, JobExecutionAPIEntity> jobs = mrApps.get(appId);
                    if (jobs.size() > 0) {
                        Set<String> jobIds = jobs.keySet();
                        apps.add(jobs.get(jobIds.iterator().next()).getAppInfo());
                        this.runningYarnApps.add(appId);
                    }
                }
                LOG.info("recover {} mr yarn apps from zookeeper", apps.size());
                this.init = true;
            } else {
                apps = resourceFetcher.getResource(Constants.ResourceType.RUNNING_MR_JOB);
                LOG.info("get {} apps from resource manager", apps.size());

                Set<String> runningAppIdsAtThisTime = runningAppIdsAtThisTime(apps);
                Set<String> runningAppIdsAtPreviousTime = this.runningYarnApps;
                Set<String> finishedAppIds = getFinishedAppIds(runningAppIdsAtThisTime,
                        runningAppIdsAtPreviousTime);

                Iterator<String> finishedAppIdIterator = finishedAppIds.iterator();
                while (finishedAppIdIterator.hasNext()) {
                    String appId = finishedAppIdIterator.next();
                    try {
                        Map<String, JobExecutionAPIEntity> result = this.runningJobManager.recoverYarnApp(appId);
                        if (result.size() > 0) {
                            if (mrApps == null) {
                                mrApps = new HashMap<>();
                            }
                            mrApps.put(appId, result);
                            AppInfo appInfo = result.get(result.keySet().iterator().next()).getAppInfo();
                            appInfo.setState(Constants.AppState.FINISHED.toString());
                            apps.add(appInfo);
                        }
                    } catch (KeeperException.NoNodeException e) {
                        LOG.warn("{}", e);
                        LOG.warn("yarn app {} has finished", appId);
                    }
                }
                this.runningYarnApps = runningAppIdsAtThisTime;
                LOG.info("get {} total apps(contains finished)", apps.size());
            }

            for (int i = 0; i < apps.size(); i++) {
                LOG.info("emit mr yarn application " + apps.get(i).getId());
                AppInfo appInfo = apps.get(i);
                if (mrApps != null && mrApps.containsKey(appInfo.getId())) {
                    //emit (AppInfo, Map<String, JobExecutionAPIEntity>)
                    collector.emit(new Values(appInfo.getId(), appInfo, mrApps.get(appInfo.getId())));
                } else {
                    collector.emit(new Values(appInfo.getId(), appInfo, null));
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            Utils.sleep(endpointConfig.fetchRunningJobInterval);
        }
    }

    private Set<String> getFinishedAppIds(Set<String> runningAppIdsAtThisTime,
            Set<String> runningAppIdsAtPreviousTime) {
        Set<String> finishedAppIds = new HashSet<>(
                CollectionUtils.subtract(runningAppIdsAtPreviousTime, runningAppIdsAtThisTime));
        return finishedAppIds;
    }

    private Set<String> runningAppIdsAtThisTime(List<AppInfo> apps) {
        Set<String> running = new HashSet<>();
        for (AppInfo appInfo : apps) {
            running.add(appInfo.getId());
        }
        return running;
    }

    private Map<String, Map<String, JobExecutionAPIEntity>> recoverRunningApps() {
        //we need read from zookeeper, path looks like /apps/mr/running/yarnAppId/jobId/
        //content of path /apps/mr/running/yarnAppId/jobId is JobExecutionAPIEntity
        //as we know, a yarn application may contains many mr jobs
        //so, the returned results is a Map-Map
        //<yarnAppId, <jobId, JobExecutionAPIEntity>>
        Map<String, Map<String, JobExecutionAPIEntity>> result = this.runningJobManager.recover();
        return result;
    }

    @Override
    public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
        outputFieldsDeclarer.declare(new Fields("appId", "appInfo", "mrJobEntity"));
    }

    @Override
    public void fail(Object msgId) {

    }

    @Override
    public void ack(Object msgId) {

    }

    @Override
    public void close() {
    }
}