org.apache.oozie.service.ZKXLogStreamingService.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.oozie.service.ZKXLogStreamingService.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.oozie.service;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.Writer;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;

import org.apache.commons.lang.StringUtils;
import org.apache.curator.x.discovery.ServiceInstance;
import org.apache.oozie.ErrorCode;
import org.apache.oozie.client.OozieClient;
import org.apache.oozie.client.rest.RestConstants;
import org.apache.oozie.util.AuthUrlClient;
import org.apache.oozie.util.Instrumentable;
import org.apache.oozie.util.Instrumentation;
import org.apache.oozie.util.SimpleTimestampedMessageParser;
import org.apache.oozie.util.TimestampedMessageParser;
import org.apache.oozie.util.XLog;
import org.apache.oozie.util.XLogStreamer;
import org.apache.oozie.util.ZKUtils;

/**
 * Service that performs streaming of log files over Web Services if enabled in XLogService and collates logs from other Oozie
 * servers.  Requires that a ZooKeeper ensemble is available.
 */
public class ZKXLogStreamingService extends XLogStreamingService implements Service, Instrumentable {

    private ZKUtils zk;
    private XLog log;

    /**
     * Initialize the log streaming service.
     *
     * @param services services instance.
     * @throws ServiceException thrown if the log streaming service could not be initialized.
     */
    @Override
    public void init(Services services) throws ServiceException {
        super.init(services);
        try {
            zk = ZKUtils.register(this);
        } catch (Exception ex) {
            throw new ServiceException(ErrorCode.E1700, ex.getMessage(), ex);
        }
        log = XLog.getLog(this.getClass());
    }

    /**
     * Destroy the log streaming service.
     */
    @Override
    public void destroy() {
        if (zk != null) {
            zk.unregister(this);
        }
        zk = null;
        super.destroy();
    }

    /**
     * Instruments the log streaming service.
     *
     * @param instr instrumentation to use.
     */
    @Override
    public void instrument(Instrumentation instr) {
        super.instrument(instr);
    }

    /**
     * Stream the log of a job.  It contacts any other running Oozie servers to collate relevant logs while streaming.
     *
     * @param logStreamer the log streamer
     * @param startTime start time for log events to filter.
     * @param endTime end time for log events to filter.
     * @param writer writer to stream the log to.
     * @throws IOException thrown if the log cannot be streamed.
     */
    @Override
    public void streamLog(XLogStreamer logStreamer, Date startTime, Date endTime, Writer writer)
            throws IOException {

        if (!logStreamer.isLogEnabled()) {
            writer.write(logStreamer.getLogDisableMessage());
            return;
        }
        // If ALL_SERVERS_PARAM is set to false, then only stream our log
        if (!Services.get().get(JobsConcurrencyService.class).isAllServerRequest(logStreamer.getRequestParam())) {
            super.streamLog(logStreamer, startTime, endTime, writer, false);
        }
        // Otherwise, we have to go collate relevant logs from the other Oozie servers
        else {
            collateLogs(logStreamer, startTime, endTime, writer);
        }
    }

    /**
     * Contacts each of the other Oozie servers, gets their logs for the job, collates them, and sends them to the user via the
     * Writer.  It will make sure to not read all of the log messages into memory at the same time to not use up the heap.  If there
     * is a problem talking to one of the other servers, it will ignore that server and prepend a message to the Writer about it.
     * For getting the logs from this server, it won't use the REST API and instead get them directly to be more efficient.
     *
     * @param logStreamer the XLogStreamer
     * @param startTime the job start time
     * @param endTime the job end time
     * @param writer the writer
     * @throws IOException Signals that an I/O exception has occurred.
     */
    private void collateLogs(XLogStreamer logStreamer, Date startTime, Date endTime, Writer writer)
            throws IOException {
        List<String> badOozies = new ArrayList<String>();
        List<ServiceInstance<Map>> oozies = null;
        try {
            oozies = zk.getAllMetaData();
        } catch (Exception ex) {
            throw new IOException("Issue communicating with ZooKeeper: " + ex.getMessage(), ex);
        }
        List<TimestampedMessageParser> parsers = new ArrayList<TimestampedMessageParser>(oozies.size());
        try {
            // Create a BufferedReader for getting the logs of each server and put them in a TimestampedMessageParser
            for (ServiceInstance<Map> oozie : oozies) {
                Map<String, String> oozieMeta = oozie.getPayload();
                String otherId = oozieMeta.get(ZKUtils.ZKMetadataKeys.OOZIE_ID);
                // If it's this server, we can just get them directly
                if (otherId.equals(zk.getZKId())) {
                    BufferedReader reader = logStreamer.makeReader(startTime, endTime);
                    parsers.add(new TimestampedMessageParser(reader, logStreamer.getXLogFilter()));
                }
                // If it's another server, we'll have to use the REST API
                else {
                    String otherUrl = oozieMeta.get(ZKUtils.ZKMetadataKeys.OOZIE_URL);
                    String jobId = logStreamer.getXLogFilter().getFilterParams().get(DagXLogInfoService.JOB);
                    try {
                        // It's important that we specify ALL_SERVERS_PARAM=false in the GET request to prevent the other Oozie
                        // Server from trying aggregate logs from the other Oozie servers (and creating an infinite recursion)
                        final String url = otherUrl + "/v" + OozieClient.WS_PROTOCOL_VERSION + "/"
                                + RestConstants.JOB + "/" + jobId + "?" + RestConstants.JOB_SHOW_PARAM + "="
                                + logStreamer.getLogType() + "&" + RestConstants.ALL_SERVER_REQUEST + "=false"
                                + AuthUrlClient.getQueryParamString(logStreamer.getRequestParam());
                        // remove doAs from url to avoid failure while fetching
                        // logs in case of HA mode
                        String key = "doAs";
                        String[] value = null;
                        if (logStreamer.getRequestParam() != null) {
                            value = logStreamer.getRequestParam().get(key);
                        }
                        String urlWithoutdoAs = null;
                        if (value != null && value.length > 0 && value[0] != null && value[0].length() > 0) {
                            urlWithoutdoAs = url.replace("&" + key + "=" + URLEncoder.encode(value[0], "UTF-8"),
                                    "");
                        } else {
                            urlWithoutdoAs = url;
                        }
                        BufferedReader reader = AuthUrlClient.callServer(urlWithoutdoAs);
                        parsers.add(new SimpleTimestampedMessageParser(reader, logStreamer.getXLogFilter()));
                    } catch (IOException ioe) {
                        log.warn(
                                "Failed to retrieve logs for job [" + jobId + "] from Oozie server with ID ["
                                        + otherId + "] at [" + otherUrl + "]; log information may be incomplete",
                                ioe);
                        badOozies.add(otherId);
                    }
                }
            }

            //If log param debug is set, we need to write start date and end date to outputstream.
            if (!StringUtils.isEmpty(logStreamer.getXLogFilter().getTruncatedMessage())) {
                writer.write(logStreamer.getXLogFilter().getTruncatedMessage());
            }

            if (logStreamer.getXLogFilter().isDebugMode()) {
                writer.write(logStreamer.getXLogFilter().getDebugMessage());
            }
            // Add a message about any servers we couldn't contact
            if (!badOozies.isEmpty()) {
                writer.write(
                        "Unable to contact the following Oozie Servers for logs (log information may be incomplete):\n");
                for (String badOozie : badOozies) {
                    writer.write("     ");
                    writer.write(badOozie);
                    writer.write("\n");
                }
                writer.write("\n");
                writer.flush();
            }

            // If it's just the one server (this server), then we don't need to do any more processing and can just copy it directly
            if (parsers.size() == 1) {
                TimestampedMessageParser parser = parsers.get(0);
                parser.processRemaining(writer, logStreamer);
            } else {
                // Now that we have a Reader for each server to get the logs from that server, we have to collate them.  Within each
                // server, the logs should already be in the correct order, so we can take advantage of that.  We'll use the
                // BufferedReaders to read the messages from the logs of each server and put them in order without having to bring
                // every message into memory at the same time.
                TreeMap<String, TimestampedMessageParser> timestampMap = new TreeMap<String, TimestampedMessageParser>();
                // populate timestampMap with initial values
                for (TimestampedMessageParser parser : parsers) {
                    if (parser.increment()) {
                        timestampMap.put(parser.getLastTimestamp(), parser);
                    }
                }
                while (timestampMap.size() > 1) {
                    // The first entry will be the earliest based on the timestamp (also removes it) from the map
                    TimestampedMessageParser earliestParser = timestampMap.pollFirstEntry().getValue();
                    // Write the message from that parser at that timestamp
                    writer.write(earliestParser.getLastMessage());
                    if (logStreamer.shouldFlushOutput(earliestParser.getLastMessage().length())) {
                        writer.flush();
                    }
                    // Increment that parser to read the next message
                    if (earliestParser.increment()) {
                        // If it still has messages left, put it back in the map with the new last timestamp for it
                        timestampMap.put(earliestParser.getLastTimestamp(), earliestParser);
                    }
                }
                // If there's only one parser left in the map, then we can simply copy the rest of its lines directly to be faster
                if (timestampMap.size() == 1) {
                    TimestampedMessageParser parser = timestampMap.values().iterator().next();
                    writer.write(parser.getLastMessage()); // don't forget the last message read by the parser
                    parser.processRemaining(writer, logStreamer);
                }
            }
        } finally {
            for (TimestampedMessageParser parser : parsers) {
                parser.closeReader();
            }
        }
    }
}