gobblin.util.HeapDumpForTaskUtils.java Source code

Java tutorial

Introduction

Here is the source code for gobblin.util.HeapDumpForTaskUtils.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package gobblin.util;

import java.io.BufferedWriter;
import java.io.IOException;
import java.io.OutputStreamWriter;

import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import gobblin.configuration.ConfigurationKeys;

/**
 * A utility class for generating script to move the heap dump .prof files to HDFS for hadoop tasks, when Java heap out of memory error is thrown.
 */
public class HeapDumpForTaskUtils {

    private static final Logger LOG = LoggerFactory.getLogger(HeapDumpForTaskUtils.class);
    private static final String DUMP_FOLDER = "dumps";

    /**
     * Generate the dumpScript, which is used when OOM error is thrown during task execution.
     * The current content dumpScript puts the .prof files to the DUMP_FOLDER within the same directory of the dumpScript.
     *
     * User needs to add the following options to the task java.opts:
     *
     * -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=./heapFileName.hprof -XX:OnOutOfMemoryError=./dumpScriptFileName
     *
     * @param dumpScript The path to the dumpScript, which needs to be added to the Distributed cache.
     * To use it, simply put the path of dumpScript to the gobblin config: job.hdfs.files.
     * @param fs File system
     * @param heapFileName the name of the .prof file.
     * @param chmod chmod for the dump script. For hdfs file, e.g, "hadoop fs -chmod 755"
     * @throws IOException
     */
    public static void generateDumpScript(Path dumpScript, FileSystem fs, String heapFileName, String chmod)
            throws IOException {
        if (fs.exists(dumpScript)) {
            LOG.info("Heap dump script already exists: " + dumpScript);
            return;
        }

        try (BufferedWriter scriptWriter = new BufferedWriter(
                new OutputStreamWriter(fs.create(dumpScript), ConfigurationKeys.DEFAULT_CHARSET_ENCODING))) {
            Path dumpDir = new Path(dumpScript.getParent(), DUMP_FOLDER);
            if (!fs.exists(dumpDir)) {
                fs.mkdirs(dumpDir);
            }

            scriptWriter.write("#!/bin/sh\n");
            scriptWriter.write("if [ -n \"$HADOOP_PREFIX\" ]; then\n");
            scriptWriter.write("  ${HADOOP_PREFIX}/bin/hadoop dfs -put " + heapFileName + " " + dumpDir
                    + "/${PWD//\\//_}.hprof\n");
            scriptWriter.write("else\n");
            scriptWriter.write("  ${HADOOP_HOME}/bin/hadoop dfs -put " + heapFileName + " " + dumpDir
                    + "/${PWD//\\//_}.hprof\n");
            scriptWriter.write("fi\n");

        } catch (IOException ioe) {
            LOG.error("Heap dump script is not generated successfully.");
            if (fs.exists(dumpScript)) {
                fs.delete(dumpScript, true);
            }
            throw ioe;
        }
        Runtime.getRuntime().exec(chmod + " " + dumpScript);
    }
}