org.apache.falcon.logging.JobLogMover.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.falcon.logging.JobLogMover.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.falcon.logging;

import org.apache.commons.lang.StringUtils;
import org.apache.falcon.entity.v0.EntityType;
import org.apache.falcon.entity.v0.process.EngineType;
import org.apache.falcon.hadoop.HadoopClientFactory;
import org.apache.falcon.security.CurrentUser;
import org.apache.falcon.workflow.WorkflowExecutionContext;
import org.apache.falcon.workflow.util.OozieActionConfigurationHelper;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.oozie.client.OozieClient;
import org.apache.oozie.client.OozieClientException;
import org.apache.oozie.client.WorkflowAction;
import org.apache.oozie.client.WorkflowJob;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URL;
import java.net.URLConnection;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

/**
 * Utility called in the post process of oozie workflow to move oozie action executor log.
 */
public class JobLogMover {

    private static final Logger LOG = LoggerFactory.getLogger(JobLogMover.class);
    private static final String YARN = "yarn";
    private static final String MAPREDUCE_FRAMEWORK = "mapreduce.framework.name";

    private static final Set<String> FALCON_ACTIONS = new HashSet<String>(
            Arrays.asList(new String[] { "eviction", "replication", }));

    private Configuration getConf() {
        Configuration conf = null;
        try {
            conf = OozieActionConfigurationHelper.createActionConf();
        } catch (IOException ioe) {
            LOG.warn("Cannot get Oozie configuration.  Returning default");
        }
        return conf == null ? new Configuration() : conf;
    }

    public void moveLog(WorkflowExecutionContext context) {
        if (UserGroupInformation.isSecurityEnabled()) {
            LOG.info("Unable to move logs as security is enabled.");
            return;
        }
        try {
            run(context);
        } catch (Exception ignored) {
            // Mask exception, a failed log mover will not fail the user workflow
            LOG.error("Exception in job log mover:", ignored);
        }
    }

    public int run(WorkflowExecutionContext context) {
        try {
            String engineUrl = context.getWorkflowEngineUrl();
            if (StringUtils.isBlank(engineUrl)) {
                LOG.warn("Unable to retrieve workflow url for {} with status {} ", context.getWorkflowId(),
                        context.getWorkflowStatus());
                return 0;
            }
            String instanceOwner = context.getWorkflowUser();
            if (StringUtils.isNotBlank(instanceOwner)) {
                CurrentUser.authenticate(instanceOwner);
            } else {
                CurrentUser.authenticate(System.getProperty("user.name"));
            }
            OozieClient client = new OozieClient(engineUrl);
            WorkflowJob jobInfo;
            try {
                jobInfo = client.getJobInfo(context.getWorkflowId());
            } catch (OozieClientException e) {
                LOG.error("Error getting jobinfo for: {}", context.getUserSubflowId(), e);
                return 0;
            }
            //Assumption is - Each wf run will have a directory
            //the corresponding job logs are stored within the respective dir
            Path path = new Path(context.getLogDir() + "/" + context.getNominalTime() + "/"
                    + String.format("%03d", context.getWorkflowRunId()));
            FileSystem fs = HadoopClientFactory.get().createProxiedFileSystem(path.toUri(), getConf());

            if (EntityType.FEED.name().equalsIgnoreCase(context.getEntityType())
                    || notUserWorkflowEngineIsOozie(context.getUserWorkflowEngine())) {
                // if replication wf, retention wf or PIG Process
                copyOozieLog(client, fs, path, jobInfo.getId());

                List<WorkflowAction> workflowActions = jobInfo.getActions();
                for (int i = 0; i < workflowActions.size(); i++) {
                    if (FALCON_ACTIONS.contains(workflowActions.get(i).getName())) {
                        copyTTlogs(fs, path, jobInfo.getActions().get(i));
                        break;
                    }
                }
            } else {
                String flowId;
                // if process wf with pig, hive
                if (context.getUserWorkflowEngine().equals("pig")
                        || context.getUserWorkflowEngine().equals("hive")) {
                    flowId = jobInfo.getId();
                } else {
                    jobInfo = client.getJobInfo(context.getUserSubflowId());
                    // if process wf with oozie engine
                    flowId = jobInfo.getExternalId();
                }
                copyOozieLog(client, fs, path, flowId);
                WorkflowJob subflowInfo = client.getJobInfo(flowId);
                List<WorkflowAction> actions = subflowInfo.getActions();
                for (WorkflowAction action : actions) {
                    if (isActionTypeSupported(action)) {
                        LOG.info("Copying hadoop TT log for action: {} of type: {}", action.getName(),
                                action.getType());
                        copyTTlogs(fs, path, action);
                    } else {
                        LOG.info("Ignoring hadoop TT log for non supported action: {} of type: {}",
                                action.getName(), action.getType());
                    }
                }
            }

        } catch (Exception e) {
            // JobLogMover doesn't throw exception, a failed log mover will not fail the user workflow
            LOG.error("Exception in log mover:", e);
        }
        return 0;
    }

    private boolean notUserWorkflowEngineIsOozie(String userWorkflowEngine) {
        // userWorkflowEngine will be null for replication and "not null" for pig, hive, oozie
        return userWorkflowEngine != null && EngineType.fromValue(userWorkflowEngine) == null;
    }

    private void copyOozieLog(OozieClient client, FileSystem fs, Path path, String id)
            throws OozieClientException, IOException {
        InputStream in = new ByteArrayInputStream(client.getJobLog(id).getBytes());
        OutputStream out = fs.create(new Path(path, "oozie.log"));
        IOUtils.copyBytes(in, out, 4096, true);
        LOG.info("Copied oozie log to {}", path);
    }

    private void copyTTlogs(FileSystem fs, Path path, WorkflowAction action) throws Exception {
        List<String> ttLogUrls = getTTlogURL(action.getExternalId());
        if (ttLogUrls != null) {
            int index = 1;
            for (String ttLogURL : ttLogUrls) {
                LOG.info("Fetching log for action: {} from url: {}", action.getExternalId(), ttLogURL);
                InputStream in = getURLinputStream(new URL(ttLogURL));
                OutputStream out = fs.create(new Path(path, action.getName() + "_" + action.getType() + "_"
                        + getMappedStatus(action.getStatus()) + "-" + index + ".log"));
                IOUtils.copyBytes(in, out, 4096, true);
                LOG.info("Copied log to {}", path);
                index++;
            }
        }
    }

    private boolean isActionTypeSupported(WorkflowAction action) {
        return action.getType().equals("pig") || action.getType().equals("hive") || action.getType().equals("java")
                || action.getType().equals("map-reduce");
    }

    private String getMappedStatus(WorkflowAction.Status status) {
        if (status == WorkflowAction.Status.FAILED || status == WorkflowAction.Status.KILLED
                || status == WorkflowAction.Status.ERROR) {
            return "FAILED";
        } else {
            return "SUCCEEDED";
        }
    }

    private List<String> getTTlogURL(String jobId) throws Exception {
        TaskLogURLRetriever logRetriever = ReflectionUtils.newInstance(getLogRetrieverClassName(getConf()),
                getConf());
        return logRetriever.retrieveTaskLogURL(jobId);
    }

    @SuppressWarnings("unchecked")
    private Class<? extends TaskLogURLRetriever> getLogRetrieverClassName(Configuration conf) {
        if (YARN.equals(conf.get(MAPREDUCE_FRAMEWORK))) {
            return TaskLogRetrieverYarn.class;
        } else {
            return DefaultTaskLogRetriever.class;
        }
    }

    private InputStream getURLinputStream(URL url) throws IOException {
        URLConnection connection = url.openConnection();
        connection.setDoOutput(true);
        connection.connect();
        return connection.getInputStream();
    }
}