Java tutorial
/* * Copyright (C) 2015 Google Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package com.google.cloud.dataflow.sdk.runners; import static com.google.cloud.dataflow.sdk.util.TimeUtil.fromCloudTime; import com.google.api.client.googleapis.json.GoogleJsonResponseException; import com.google.api.client.util.BackOff; import com.google.api.client.util.BackOffUtils; import com.google.api.client.util.NanoClock; import com.google.api.client.util.Sleeper; import com.google.api.services.dataflow.Dataflow; import com.google.api.services.dataflow.model.Job; import com.google.api.services.dataflow.model.JobMessage; import com.google.api.services.dataflow.model.JobMetrics; import com.google.api.services.dataflow.model.MetricUpdate; import com.google.cloud.dataflow.sdk.PipelineResult; import com.google.cloud.dataflow.sdk.runners.dataflow.DataflowAggregatorTransforms; import com.google.cloud.dataflow.sdk.runners.dataflow.DataflowMetricUpdateExtractor; import com.google.cloud.dataflow.sdk.transforms.Aggregator; import com.google.cloud.dataflow.sdk.util.FluentBackoff; import com.google.cloud.dataflow.sdk.util.MapAggregatorValues; import com.google.cloud.dataflow.sdk.util.MonitoringUtil; import com.google.common.annotations.VisibleForTesting; import org.joda.time.Duration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.net.SocketTimeoutException; import java.util.List; import java.util.Map; import java.util.concurrent.TimeUnit; import javax.annotation.Nullable; /** * A DataflowPipelineJob represents a job submitted to Dataflow using * {@link DataflowPipelineRunner}. */ public class DataflowPipelineJob implements PipelineResult { private static final Logger LOG = LoggerFactory.getLogger(DataflowPipelineJob.class); /** * The id for the job. */ private String jobId; /** * Google cloud project to associate this pipeline with. */ private String projectId; /** * Client for the Dataflow service. This can be used to query the service * for information about the job. */ private Dataflow dataflowClient; /** * The state the job terminated in or {@code null} if the job has not terminated. */ @Nullable private State terminalState = null; /** * The job that replaced this one or {@code null} if the job has not been replaced. */ @Nullable private DataflowPipelineJob replacedByJob = null; private DataflowAggregatorTransforms aggregatorTransforms; /** * The Metric Updates retrieved after the job was in a terminal state. */ private List<MetricUpdate> terminalMetricUpdates; /** * The polling interval for job status and messages information. */ static final Duration MESSAGES_POLLING_INTERVAL = Duration.standardSeconds(2); static final Duration STATUS_POLLING_INTERVAL = Duration.standardSeconds(2); static final double DEFAULT_BACKOFF_EXPONENT = 1.5; /** * The amount of polling retries for job status and messages information. */ static final int MESSAGES_POLLING_RETRIES = 11; static final int STATUS_POLLING_RETRIES = 4; private static final FluentBackoff MESSAGES_BACKOFF_FACTORY = FluentBackoff.DEFAULT .withInitialBackoff(MESSAGES_POLLING_INTERVAL).withMaxRetries(MESSAGES_POLLING_RETRIES) .withExponent(DEFAULT_BACKOFF_EXPONENT); protected static final FluentBackoff STATUS_BACKOFF_FACTORY = FluentBackoff.DEFAULT .withInitialBackoff(STATUS_POLLING_INTERVAL).withMaxRetries(STATUS_POLLING_RETRIES) .withExponent(DEFAULT_BACKOFF_EXPONENT); /** * Constructs the job. * * @param projectId the project id * @param jobId the job id * @param dataflowClient the client for the Dataflow Service */ public DataflowPipelineJob(String projectId, String jobId, Dataflow dataflowClient, DataflowAggregatorTransforms aggregatorTransforms) { this.projectId = projectId; this.jobId = jobId; this.dataflowClient = dataflowClient; this.aggregatorTransforms = aggregatorTransforms; } /** * Get the id of this job. */ public String getJobId() { return jobId; } /** * Get the project this job exists in. */ public String getProjectId() { return projectId; } /** * Returns a new {@link DataflowPipelineJob} for the job that replaced this one, if applicable. * * @throws IllegalStateException if called before the job has terminated or if the job terminated * but was not updated */ public DataflowPipelineJob getReplacedByJob() { if (terminalState == null) { throw new IllegalStateException("getReplacedByJob() called before job terminated"); } if (replacedByJob == null) { throw new IllegalStateException("getReplacedByJob() called for job that was not replaced"); } return replacedByJob; } /** * Get the Cloud Dataflow API Client used by this job. */ public Dataflow getDataflowClient() { return dataflowClient; } /** * Waits for the job to finish and return the final status. * * @param timeToWait The time to wait in units timeUnit for the job to finish. * Provide a value less than 1 ms for an infinite wait. * @param timeUnit The unit of time for timeToWait. * @param messageHandler If non null this handler will be invoked for each * batch of messages received. * @return The final state of the job or null on timeout or if the * thread is interrupted. * @throws IOException If there is a persistent problem getting job * information. * @throws InterruptedException */ @Nullable public State waitToFinish(long timeToWait, TimeUnit timeUnit, MonitoringUtil.JobMessagesHandler messageHandler) throws IOException, InterruptedException { Duration duration = Duration.millis(timeUnit.toMillis(timeToWait)); return waitToFinish(duration, messageHandler, Sleeper.DEFAULT, NanoClock.SYSTEM); } /** * Wait for the job to finish and return the final status. * * @param duration The total time to wait for the job to finish. * Provide a value less than 1 ms for an infinite wait. * @param messageHandler If non null this handler will be invoked for each * batch of messages received. * @param sleeper A sleeper to use to sleep between attempts. * @param nanoClock A nanoClock used to time the total time taken. * @return The final state of the job or null on timeout or if the * thread is interrupted. * @throws IOException If there is a persistent problem getting job * information. * @throws InterruptedException */ @Nullable @VisibleForTesting State waitToFinish(Duration duration, MonitoringUtil.JobMessagesHandler messageHandler, Sleeper sleeper, NanoClock nanoClock) throws IOException, InterruptedException { MonitoringUtil monitor = new MonitoringUtil(projectId, dataflowClient); long lastTimestamp = 0; BackOff backoff; if (!duration.isLongerThan(Duration.ZERO)) { backoff = MESSAGES_BACKOFF_FACTORY.backoff(); } else { backoff = MESSAGES_BACKOFF_FACTORY.withMaxCumulativeBackoff(duration).backoff(); } // This function tracks the cumulative time from the *first request* to enforce the wall-clock // limit. Any backoff instance could, at best, track the the time since the first attempt at a // given request. Thus, we need to track the cumulative time ourselves. long startNanos = nanoClock.nanoTime(); State state; do { // Get the state of the job before listing messages. This ensures we always fetch job // messages after the job finishes to ensure we have all them. state = getStateWithRetries(STATUS_BACKOFF_FACTORY.withMaxRetries(0).backoff(), sleeper); boolean hasError = state == State.UNKNOWN; if (messageHandler != null && !hasError) { // Process all the job messages that have accumulated so far. try { List<JobMessage> allMessages = monitor.getJobMessages(jobId, lastTimestamp); if (!allMessages.isEmpty()) { lastTimestamp = fromCloudTime(allMessages.get(allMessages.size() - 1).getTime()) .getMillis(); messageHandler.process(allMessages); } } catch (GoogleJsonResponseException | SocketTimeoutException e) { hasError = true; LOG.warn("There were problems getting current job messages: {}.", e.getMessage()); LOG.debug("Exception information:", e); } } if (!hasError) { // We can stop if the job is done. if (state.isTerminal()) { return state; } // The job is not done, so we must keep polling. backoff.reset(); // If a total duration for all backoff has been set, update the new cumulative sleep time to // be the remaining total backoff duration, stopping if we have already exceeded the // allotted time. if (duration.isLongerThan(Duration.ZERO)) { long nanosConsumed = nanoClock.nanoTime() - startNanos; Duration consumed = Duration.millis((nanosConsumed + 999999) / 1000000); Duration remaining = duration.minus(consumed); if (remaining.isLongerThan(Duration.ZERO)) { backoff = MESSAGES_BACKOFF_FACTORY.withMaxCumulativeBackoff(remaining).backoff(); } else { // If there is no time remaining, don't bother backing off. backoff = BackOff.STOP_BACKOFF; } } } } while (BackOffUtils.next(sleeper, backoff)); LOG.warn("No terminal state was returned. State value {}", state); return null; // Timed out. } /** * Cancels the job. * @throws IOException if there is a problem executing the cancel request. */ public void cancel() throws IOException { Job content = new Job(); content.setProjectId(projectId); content.setId(jobId); content.setRequestedState("JOB_STATE_CANCELLED"); dataflowClient.projects().jobs().update(projectId, jobId, content).execute(); } @Override public State getState() { if (terminalState != null) { return terminalState; } return getStateWithRetries(STATUS_BACKOFF_FACTORY.backoff(), Sleeper.DEFAULT); } /** * Attempts to get the state. Uses exponential backoff on failure up to the maximum number * of passed in attempts. * * @param attempts The amount of attempts to make. * @param sleeper Object used to do the sleeps between attempts. * @return The state of the job or State.UNKNOWN in case of failure. */ @VisibleForTesting State getStateWithRetries(BackOff attempts, Sleeper sleeper) { if (terminalState != null) { return terminalState; } try { Job job = getJobWithRetries(attempts, sleeper); return MonitoringUtil.toState(job.getCurrentState()); } catch (IOException exn) { // The only IOException that getJobWithRetries is permitted to throw is the final IOException // that caused the failure of retry. Other exceptions are wrapped in an unchecked exceptions // and will propagate. return State.UNKNOWN; } } /** * Attempts to get the underlying {@link Job}. Uses exponential backoff on failure up to the * maximum number of passed in attempts. * * @param backoff the {@link BackOff} used to control retries. * @param sleeper Object used to do the sleeps between attempts. * @return The underlying {@link Job} object. * @throws IOException When the maximum number of retries is exhausted, the last exception is * thrown. */ private Job getJobWithRetries(BackOff backoff, Sleeper sleeper) throws IOException { // Retry loop ends in return or throw while (true) { try { Job job = dataflowClient.projects().jobs().get(projectId, jobId).execute(); State currentState = MonitoringUtil.toState(job.getCurrentState()); if (currentState.isTerminal()) { terminalState = currentState; replacedByJob = new DataflowPipelineJob(getProjectId(), job.getReplacedByJobId(), dataflowClient, aggregatorTransforms); } return job; } catch (IOException exn) { LOG.warn("There were problems getting current job status: {}.", exn.getMessage()); LOG.debug("Exception information:", exn); if (!nextBackOff(sleeper, backoff)) { throw exn; } } } } /** * Identical to {@link BackOffUtils#next} but without checked exceptions. */ private boolean nextBackOff(Sleeper sleeper, BackOff backoff) { try { return BackOffUtils.next(sleeper, backoff); } catch (InterruptedException | IOException e) { if (e instanceof InterruptedException) { Thread.currentThread().interrupt(); } throw new RuntimeException(e); } } @Override public <OutputT> AggregatorValues<OutputT> getAggregatorValues(Aggregator<?, OutputT> aggregator) throws AggregatorRetrievalException { try { return new MapAggregatorValues<>(fromMetricUpdates(aggregator)); } catch (IOException e) { throw new AggregatorRetrievalException( "IOException when retrieving Aggregator values for Aggregator " + aggregator, e); } } private <OutputT> Map<String, OutputT> fromMetricUpdates(Aggregator<?, OutputT> aggregator) throws IOException { if (aggregatorTransforms.contains(aggregator)) { List<MetricUpdate> metricUpdates; if (terminalMetricUpdates != null) { metricUpdates = terminalMetricUpdates; } else { boolean terminal = getState().isTerminal(); JobMetrics jobMetrics = dataflowClient.projects().jobs().getMetrics(projectId, jobId).execute(); metricUpdates = jobMetrics.getMetrics(); if (terminal && jobMetrics.getMetrics() != null) { terminalMetricUpdates = metricUpdates; } } return DataflowMetricUpdateExtractor.fromMetricUpdates(aggregator, aggregatorTransforms, metricUpdates); } else { throw new IllegalArgumentException("Aggregator " + aggregator + " is not used in this pipeline"); } } }