Example usage for com.amazonaws.services.elasticmapreduce.model DescribeJobFlowsRequest DescribeJobFlowsRequest

Introduction

In this page you can find the example usage for com.amazonaws.services.elasticmapreduce.model DescribeJobFlowsRequest DescribeJobFlowsRequest.

Prototype

public DescribeJobFlowsRequest()

Source Link

Document

Default constructor for DescribeJobFlowsRequest object.

Usage

From source file:org.pentaho.amazon.hive.job.AmazonHiveJobExecutor.java

License:Apache License

/**
 * Executes a Hive job into the AWS Elastic MapReduce service.
 *///from w  ww  . jav  a 2 s  .  c o m
public Result execute(Result result, int arg1) throws KettleException {

    // Setup a log file.
    Log4jFileAppender appender = null;
    String logFileName = "pdi-" + this.getName(); //$NON-NLS-1$
    try {
        appender = LogWriter.createFileAppender(logFileName, true, false);
        LogWriter.getInstance().addAppender(appender);
        log.setLogLevel(parentJob.getLogLevel());
    } catch (Exception e) {
        logError(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.FailedToOpenLogFile", //$NON-NLS-1$
                logFileName, e.toString()));
        logError(Const.getStackTracker(e));
    }

    try {
        // Create and connect an AWS service.
        AmazonElasticMapReduceClient emrClient = new AmazonElasticMapReduceClient(awsCredentials);
        AmazonS3 s3Client = new AmazonS3Client(awsCredentials);

        // Get bucket name and S3 URL.
        String stagingBucketName = GetBucketName(stagingDir);
        String stagingS3BucketUrl = "s3://" + stagingBucketName; //$NON-NLS-1$

        // Prepare staging S3 URL for Hive script file.
        String stagingS3qUrl = "";
        if (qUrl.startsWith(S3FileProvider.SCHEME + "://")) { //$NON-NLS-1$

            // If the .q file is in S3, its staging S3 URL is s3://{bucketname}/{path}
            if (qUrl.indexOf("@s3") > 0) { //$NON-NLS-1$
                stagingS3qUrl = S3FileProvider.SCHEME + "://" + qUrl.substring(qUrl.indexOf("@s3") + 4); //$NON-NLS-1$
            } else {
                stagingS3qUrl = qUrl;
            }

        } else {
            // A local filename is given for the Hive script file. It should be copied to the S3 Log Directory.
            // First, check for the correct protocol.
            if (!qUrl.startsWith("file:")) { //$NON-NLS-1$
                if (log.isBasic()) {
                    logBasic(BaseMessages.getString(PKG,
                            "AmazonElasticMapReduceJobExecutor.HiveScriptFilename.Error") + qUrl); //$NON-NLS-1$
                }
            }
            // pull down .q file from VSF
            FileObject qFile = KettleVFS.getFileObject(buildFilename(qUrl));
            File tmpFile = File.createTempFile("customEMR", "q"); //$NON-NLS-1$
            tmpFile.deleteOnExit();
            FileOutputStream tmpFileOut = new FileOutputStream(tmpFile);
            IOUtils.copy(qFile.getContent().getInputStream(), tmpFileOut);
            // Get key name for the script file S3 destination. Key is defined as path name after {bucket}/
            String key = GetKeyFromS3Url(stagingDir);
            if (key == null) {
                key = qFile.getName().getBaseName();
            } else {
                key += "/" + qFile.getName().getBaseName(); //$NON-NLS-1$
            }

            // delete the previous .q file in S3
            try {
                s3Client.deleteObject(stagingBucketName, key);
            } catch (Exception ex) {
                logError(Const.getStackTracker(ex));
            }

            // Put .q file in S3 Log Directory.
            s3Client.putObject(new PutObjectRequest(stagingBucketName, key, tmpFile));
            stagingS3qUrl = stagingS3BucketUrl + "/" + key; //$NON-NLS-1$
        }

        // AWS provides script-runner.jar (in its public bucket), which should be used as a MapReduce jar for Hive EMR
        // job.
        jarUrl = "s3://elasticmapreduce/libs/script-runner/script-runner.jar"; //$NON-NLS-1$

        RunJobFlowRequest runJobFlowRequest = null;
        RunJobFlowResult runJobFlowResult = null;
        if (StringUtil.isEmpty(hadoopJobFlowId)) {
            // create an EMR job flow, start a step to setup Hive and get the job flow ID.
            runJobFlowRequest = createJobFlow();
            runJobFlowResult = emrClient.runJobFlow(runJobFlowRequest);
            hadoopJobFlowId = runJobFlowResult.getJobFlowId();
        }

        // Now EMR job flow is ready to accept a Run Hive Script step.
        // First, prepare a Job Flow ID list.
        List<String> jobFlowIds = new ArrayList<String>();
        jobFlowIds.add(hadoopJobFlowId);

        // Configure a HadoopJarStep.
        String args = "s3://elasticmapreduce/libs/hive/hive-script "
                + "--base-path s3://elasticmapreduce/libs/hive/ --hive-version 0.7 --run-hive-script --args -f "
                + environmentSubstitute(stagingS3qUrl) + " " + environmentSubstitute(cmdLineArgs); //$NON-NLS-1$
        List<StepConfig> steps = ConfigHadoopJarStep(hadoopJobName, jarUrl, args);

        // Add a Run Hive Script step to the existing job flow.
        AddJobFlowStepsRequest addJobFlowStepsRequest = new AddJobFlowStepsRequest();
        addJobFlowStepsRequest.setJobFlowId(hadoopJobFlowId);
        addJobFlowStepsRequest.setSteps(steps);
        emrClient.addJobFlowSteps(addJobFlowStepsRequest);

        // Set a logging interval.
        String loggingIntervalS = environmentSubstitute(loggingInterval);
        int logIntv = 10;
        try {
            logIntv = Integer.parseInt(loggingIntervalS);
        } catch (NumberFormatException ex) {
            logError(BaseMessages.getString(PKG, "AmazonElasticMapReduceJobExecutor.LoggingInterval.Error", //$NON-NLS-1$
                    loggingIntervalS));
        }

        // monitor and log if intended.
        if (blocking) {
            try {
                if (log.isBasic()) {

                    String executionState = "RUNNING"; //$NON-NLS-1$

                    while (isRunning(executionState)) {
                        DescribeJobFlowsRequest describeJobFlowsRequest = new DescribeJobFlowsRequest();
                        describeJobFlowsRequest.setJobFlowIds(jobFlowIds);

                        DescribeJobFlowsResult describeJobFlowsResult = emrClient
                                .describeJobFlows(describeJobFlowsRequest);
                        boolean found = false;
                        for (JobFlowDetail jobFlowDetail : describeJobFlowsResult.getJobFlows()) {
                            if (jobFlowDetail.getJobFlowId().equals(hadoopJobFlowId)) {
                                executionState = jobFlowDetail.getExecutionStatusDetail().getState();
                                found = true;
                            }
                        }

                        if (!found) {
                            break;
                        }
                        logBasic(hadoopJobName + " " + BaseMessages.getString(PKG, //$NON-NLS-1$
                                "AmazonElasticMapReduceJobExecutor.JobFlowExecutionStatus", hadoopJobFlowId)
                                + executionState);

                        if (parentJob.isStopped()) {
                            if (!alive) {
                                TerminateJobFlowsRequest terminateJobFlowsRequest = new TerminateJobFlowsRequest();
                                terminateJobFlowsRequest.withJobFlowIds(hadoopJobFlowId);
                                emrClient.terminateJobFlows(terminateJobFlowsRequest);
                            }
                            break;
                        }

                        try {
                            if (isRunning(executionState)) {
                                Thread.sleep(logIntv * 1000);
                            }
                        } catch (InterruptedException ie) {
                            logError(Const.getStackTracker(ie));
                        }
                    }

                    if ("FAILED".equalsIgnoreCase(executionState)) { //$NON-NLS-1$
                        result.setStopped(true);
                        result.setNrErrors(1);
                        result.setResult(false);

                        S3Object outObject = s3Client.getObject(stagingBucketName,
                                hadoopJobFlowId + "/steps/1/stdout"); //$NON-NLS-1$
                        ByteArrayOutputStream outStream = new ByteArrayOutputStream();
                        IOUtils.copy(outObject.getObjectContent(), outStream);
                        logError(outStream.toString());

                        S3Object errorObject = s3Client.getObject(stagingBucketName,
                                hadoopJobFlowId + "/steps/1/stderr"); //$NON-NLS-1$
                        ByteArrayOutputStream errorStream = new ByteArrayOutputStream();
                        IOUtils.copy(errorObject.getObjectContent(), errorStream);
                        logError(errorStream.toString());
                    }
                }
            } catch (Exception e) {
                logError(e.getMessage(), e);
            }
        }

    } catch (Throwable t) {
        t.printStackTrace();
        result.setStopped(true);
        result.setNrErrors(1);
        result.setResult(false);
        logError(t.getMessage(), t);
    }

    if (appender != null) {
        LogWriter.getInstance().removeAppender(appender);
        appender.close();

        ResultFile resultFile = new ResultFile(ResultFile.FILE_TYPE_LOG, appender.getFile(),
                parentJob.getJobname(), getName());
        result.getResultFiles().put(resultFile.getFile().toString(), resultFile);
    }

    return result;
}