Example usage for java.util.concurrent TimeoutException TimeoutException

List of usage examples for java.util.concurrent TimeoutException TimeoutException

Introduction

In this page you can find the example usage for java.util.concurrent TimeoutException TimeoutException.

Prototype

public TimeoutException(String message) 

Source Link

Document

Constructs a TimeoutException with the specified detail message.

Usage

From source file:com.vmware.photon.controller.common.dcp.ServiceHostUtils.java

public static <H extends ServiceHost> void deleteAllDocuments(H host, String referrer, long timeout,
        TimeUnit timeUnit) throws Throwable {
    QueryTask.Query selfLinkClause = new QueryTask.Query()
            .setTermPropertyName(ServiceDocument.FIELD_NAME_SELF_LINK).setTermMatchValue("*")
            .setTermMatchType(QueryTask.QueryTerm.MatchType.WILDCARD);

    QueryTask.QuerySpecification querySpecification = new QueryTask.QuerySpecification();
    querySpecification.query.addBooleanClause(selfLinkClause);
    QueryTask queryTask = QueryTask.create(querySpecification).setDirect(true);

    NodeGroupBroadcastResponse queryResponse = ServiceHostUtils.sendBroadcastQueryAndWait(host, referrer,
            queryTask);//from w ww .  j a v a2  s .c  om

    Set<String> documentLinks = QueryTaskUtils.getBroadcastQueryResults(queryResponse);

    if (documentLinks == null || documentLinks.size() <= 0) {
        return;
    }

    CountDownLatch latch = new CountDownLatch(1);

    OperationJoin.JoinedCompletionHandler handler = new OperationJoin.JoinedCompletionHandler() {
        @Override
        public void handle(Map<Long, Operation> ops, Map<Long, Throwable> failures) {
            if (failures != null && !failures.isEmpty()) {
                for (Throwable e : failures.values()) {
                    logger.error("deleteAllDocuments failed", e);
                }
            }
            latch.countDown();
        }
    };

    Collection<Operation> deletes = new LinkedList<>();
    for (String documentLink : documentLinks) {
        Operation deleteOperation = Operation.createDelete(UriUtils.buildUri(host, documentLink)).setBody("{}")
                .setReferer(UriUtils.buildUri(host, referrer));
        deletes.add(deleteOperation);
    }

    OperationJoin join = OperationJoin.create(deletes);
    join.setCompletion(handler);
    join.sendWith(host);
    if (!latch.await(timeout, timeUnit)) {
        throw new TimeoutException(String
                .format("Deletion of all documents timed out. Timeout:{%s}, TimeUnit:{%s}", timeout, timeUnit));
    }
}

From source file:brooklyn.util.internal.ssh.sshj.SshjTool.java

/**
 * Executes the script in the background (`nohup ... &`), and then executes other ssh commands to poll for the
 * stdout, stderr and exit code of that original process (which will each have been written to separate files).
 * /*w w  w .j av  a  2 s .  com*/
 * The polling is a "long poll". That is, it executes a long-running ssh command to retrieve the stdout, etc.
 * If that long-poll command fails, then we just execute another one to pick up from where it left off.
 * This means we do not need to execute many ssh commands (which are expensive), but can still return promptly
 * when the command completes.
 * 
 * Much of this was motivated by https://issues.apache.org/jira/browse/BROOKLYN-106, which is no longer
 * an issue. The retries (e.g. in the upload-script) are arguably overkill given that {@link #acquire(SshAction)}
 * will already retry. However, leaving this in place as it could prove useful when working with flakey
 * networks in the future.
 * 
 * TODO There are (probably) issues with this method when using {@link ShellTool#PROP_RUN_AS_ROOT}.
 * I (Aled) saw the .pid file having an owner of root:root, and a failure message in stderr of:
 *   -bash: line 3: /tmp/brooklyn-20150113-161203056-XMEo-move_install_dir_from_user_to_.pid: Permission denied
 */
protected int execScriptAsyncAndPoll(final Map<String, ?> props, final List<String> commands,
        final Map<String, ?> env) {
    return new ToolAbstractAsyncExecScript(props) {
        private int maxConsecutiveSshFailures = 3;
        private Duration maxDelayBetweenPolls = Duration.seconds(20);
        private Duration pollTimeout = getOptionalVal(props, PROP_EXEC_ASYNC_POLLING_TIMEOUT,
                Duration.FIVE_MINUTES);
        private int iteration = 0;
        private int consecutiveSshFailures = 0;
        private int stdoutCount = 0;
        private int stderrCount = 0;
        private Stopwatch timer;

        public int run() {
            timer = Stopwatch.createStarted();
            final String scriptContents = toScript(props, commands, env);
            if (LOG.isTraceEnabled())
                LOG.trace("Running shell command at {} as async script: {}", host, scriptContents);

            // Upload script; try repeatedly because have seen timeout intermittently on vcloud-director (BROOKLYN-106 related).
            boolean uploadSuccess = Repeater
                    .create("async script upload on " + SshjTool.this.toString() + " (for " + getSummary()
                            + ")")
                    .backoffTo(maxDelayBetweenPolls).limitIterationsTo(3).rethrowException()
                    .until(new Callable<Boolean>() {
                        @Override
                        public Boolean call() throws Exception {
                            iteration++;
                            if (LOG.isDebugEnabled()) {
                                String msg = "Uploading (iteration=" + iteration + ") for async script on "
                                        + SshjTool.this.toString() + " (for " + getSummary() + ")";
                                if (iteration == 1) {
                                    LOG.trace(msg);
                                } else {
                                    LOG.debug(msg);
                                }
                            }
                            copyToServer(ImmutableMap.of("permissions", "0700"), scriptContents.getBytes(),
                                    scriptPath);
                            return true;
                        }
                    }).run();

            if (!uploadSuccess) {
                // Unexpected! Should have either returned true or have rethrown the exception; should never get false.
                String msg = "Unexpected state: repeated failure for async script upload on "
                        + SshjTool.this.toString() + " (" + getSummary() + ")";
                LOG.warn(msg + "; rethrowing");
                throw new IllegalStateException(msg);
            }

            // Execute script asynchronously
            int execResult = asInt(acquire(new ShellAction(buildRunScriptCommand(), out, err, execTimeout)),
                    -1);
            if (execResult != 0)
                return execResult;

            // Long polling to get the status
            try {
                final AtomicReference<Integer> result = new AtomicReference<Integer>();
                boolean success = Repeater
                        .create("async script long-poll on " + SshjTool.this.toString() + " (for "
                                + getSummary() + ")")
                        .backoffTo(maxDelayBetweenPolls).limitTimeTo(execTimeout)
                        .until(new Callable<Boolean>() {
                            @Override
                            public Boolean call() throws Exception {
                                iteration++;
                                if (LOG.isDebugEnabled())
                                    LOG.debug("Doing long-poll (iteration=" + iteration
                                            + ") for async script to complete on " + SshjTool.this.toString()
                                            + " (for " + getSummary() + ")");
                                Integer exitstatus = longPoll();
                                result.set(exitstatus);
                                return exitstatus != null;
                            }
                        }).run();

                if (!success) {
                    // Timed out
                    String msg = "Timeout for async script to complete on " + SshjTool.this.toString() + " ("
                            + getSummary() + ")";
                    LOG.warn(msg + "; rethrowing");
                    throw new TimeoutException(msg);
                }

                return result.get();

            } catch (Exception e) {
                LOG.debug("Problem polling for async script on " + SshjTool.this.toString() + " (for "
                        + getSummary() + "); rethrowing after deleting temporary files", e);
                throw Exceptions.propagate(e);
            } finally {
                // Delete the temporary files created (and the `tail -c` commands that might have been left behind by long-polls).
                // Using pollTimeout so doesn't wait forever, but waits for a reasonable (configurable) length of time.
                // TODO also execute this if the `buildRunScriptCommand` fails, as that might have left files behind?
                try {
                    int execDeleteResult = asInt(
                            acquire(new ShellAction(deleteTemporaryFilesCommand(), out, err, pollTimeout)), -1);
                    if (execDeleteResult != 0) {
                        LOG.debug("Problem deleting temporary files of async script on "
                                + SshjTool.this.toString() + " (for " + getSummary() + "): exit status "
                                + execDeleteResult);
                    }
                } catch (Exception e) {
                    Exceptions.propagateIfFatal(e);
                    LOG.debug("Problem deleting temporary files of async script on " + SshjTool.this.toString()
                            + " (for " + getSummary() + "); continuing", e);
                }
            }
        }

        Integer longPoll() throws IOException {
            // Long-polling to get stdout, stderr + exit status of async task.
            // If our long-poll disconnects, we will just re-execute.
            // We wrap the stdout/stderr so that we can get the size count. 
            // If we disconnect, we will pick up from that char of the stream.
            // TODO Additional stdout/stderr written by buildLongPollCommand() could interfere, 
            //      causing us to miss some characters.
            Duration nextPollTimeout = Duration.min(pollTimeout,
                    Duration.millis(execTimeout.toMilliseconds() - timer.elapsed(TimeUnit.MILLISECONDS)));
            CountingOutputStream countingOut = (out == null) ? null : new CountingOutputStream(out);
            CountingOutputStream countingErr = (err == null) ? null : new CountingOutputStream(err);
            List<String> pollCommand = buildLongPollCommand(stdoutCount, stderrCount, nextPollTimeout);
            Duration sshJoinTimeout = nextPollTimeout.add(Duration.TEN_SECONDS);
            ShellAction action = new ShellAction(pollCommand, countingOut, countingErr, sshJoinTimeout);

            int longPollResult;
            try {
                longPollResult = asInt(acquire(action, 3, nextPollTimeout), -1);
            } catch (RuntimeTimeoutException e) {
                if (LOG.isDebugEnabled())
                    LOG.debug("Long-poll timed out on " + SshjTool.this.toString() + " (for " + getSummary()
                            + "): " + e);
                return null;
            }
            stdoutCount += (countingOut == null) ? 0 : countingOut.getCount();
            stderrCount += (countingErr == null) ? 0 : countingErr.getCount();

            if (longPollResult == 0) {
                if (LOG.isDebugEnabled())
                    LOG.debug("Long-poll succeeded (exit status 0) on " + SshjTool.this.toString() + " (for "
                            + getSummary() + ")");
                return longPollResult; // success

            } else if (longPollResult == -1) {
                // probably a connection failure; try again
                if (LOG.isDebugEnabled())
                    LOG.debug("Long-poll received exit status -1; will retry on " + SshjTool.this.toString()
                            + " (for " + getSummary() + ")");
                return null;

            } else if (longPollResult == 125) {
                // 125 is the special code for timeout in long-poll (see buildLongPollCommand).
                // However, there is a tiny chance that the underlying command might have returned that exact exit code!
                // Don't treat a timeout as a "consecutiveSshFailure".
                if (LOG.isDebugEnabled())
                    LOG.debug("Long-poll received exit status " + longPollResult
                            + "; most likely timeout; retrieving actual status on " + SshjTool.this.toString()
                            + " (for " + getSummary() + ")");
                return retrieveStatusCommand();

            } else {
                // want to double-check whether this is the exit-code from the async process, or
                // some unexpected failure in our long-poll command.
                if (LOG.isDebugEnabled())
                    LOG.debug("Long-poll received exit status " + longPollResult
                            + "; retrieving actual status on " + SshjTool.this.toString() + " (for "
                            + getSummary() + ")");
                Integer result = retrieveStatusCommand();
                if (result != null) {
                    return result;
                }
            }

            consecutiveSshFailures++;
            if (consecutiveSshFailures > maxConsecutiveSshFailures) {
                LOG.warn("Aborting on " + consecutiveSshFailures
                        + " consecutive ssh connection errors (return -1) when polling for async script to complete on "
                        + SshjTool.this.toString() + " (" + getSummary() + ")");
                return -1;
            } else {
                LOG.info("Retrying after ssh connection error when polling for async script to complete on "
                        + SshjTool.this.toString() + " (" + getSummary() + ")");
                return null;
            }
        }

        Integer retrieveStatusCommand() throws IOException {
            // want to double-check whether this is the exit-code from the async process, or
            // some unexpected failure in our long-poll command.
            ByteArrayOutputStream statusOut = new ByteArrayOutputStream();
            ByteArrayOutputStream statusErr = new ByteArrayOutputStream();
            int statusResult = asInt(
                    acquire(new ShellAction(buildRetrieveStatusCommand(), statusOut, statusErr, execTimeout)),
                    -1);

            if (statusResult == 0) {
                // The status we retrieved really is valid; return it.
                // TODO How to ensure no additional output in stdout/stderr when parsing below?
                String statusOutStr = new String(statusOut.toByteArray()).trim();
                if (Strings.isEmpty(statusOutStr)) {
                    // suggests not yet completed; will retry with long-poll
                    if (LOG.isDebugEnabled())
                        LOG.debug(
                                "Long-poll retrieved status directly; command successful but no result available on "
                                        + SshjTool.this.toString() + " (for " + getSummary() + ")");
                    return null;
                } else {
                    if (LOG.isDebugEnabled())
                        LOG.debug("Long-poll retrieved status directly; returning '" + statusOutStr + "' on "
                                + SshjTool.this.toString() + " (for " + getSummary() + ")");
                    int result = Integer.parseInt(statusOutStr);
                    return result;
                }

            } else if (statusResult == -1) {
                // probably a connection failure; try again with long-poll
                if (LOG.isDebugEnabled())
                    LOG.debug("Long-poll retrieving status directly received exit status -1; will retry on "
                            + SshjTool.this.toString() + " (for " + getSummary() + ")");
                return null;

            } else {
                if (out != null) {
                    out.write(toUTF8ByteArray(
                            "retrieving status failed with exit code " + statusResult + " (stdout follow)"));
                    out.write(statusOut.toByteArray());
                }
                if (err != null) {
                    err.write(toUTF8ByteArray(
                            "retrieving status failed with exit code " + statusResult + " (stderr follow)"));
                    err.write(statusErr.toByteArray());
                }

                if (LOG.isDebugEnabled())
                    LOG.debug("Long-poll retrieving status failed; returning " + statusResult + " on "
                            + SshjTool.this.toString() + " (for " + getSummary() + ")");
                return statusResult;
            }
        }
    }.run();
}

From source file:org.apache.tinkerpop.gremlin.server.op.AbstractEvalOpProcessor.java

/**
 * Called by {@link #evalOpInternal} when iterating a result set. Implementers should respect the
 * {@link Settings#serializedResponseTimeout} configuration and break the serialization process if
 * it begins to take too long to do so, throwing a {@link java.util.concurrent.TimeoutException} in such
 * cases./* w ww. j  a  v a  2  s .co  m*/
 *
 * @param context The Gremlin Server {@link Context} object containing settings, request message, etc.
 * @param itty The result to iterator
 * @throws TimeoutException if the time taken to serialize the entire result set exceeds the allowable time.
 */
protected void handleIterator(final Context context, final Iterator itty)
        throws TimeoutException, InterruptedException {
    final ChannelHandlerContext ctx = context.getChannelHandlerContext();
    final RequestMessage msg = context.getRequestMessage();
    final Settings settings = context.getSettings();
    final MessageSerializer serializer = ctx.channel().attr(StateKey.SERIALIZER).get();
    final boolean useBinary = ctx.channel().attr(StateKey.USE_BINARY).get();
    boolean warnOnce = false;

    // sessionless requests are always transaction managed, but in-session requests are configurable.
    final boolean managedTransactionsForRequest = manageTransactions ? true
            : (Boolean) msg.getArgs().getOrDefault(Tokens.ARGS_MANAGE_TRANSACTION, false);

    // we have an empty iterator - happens on stuff like: g.V().iterate()
    if (!itty.hasNext()) {
        // as there is nothing left to iterate if we are transaction managed then we should execute a
        // commit here before we send back a NO_CONTENT which implies success
        if (managedTransactionsForRequest)
            attemptCommit(msg, context.getGraphManager(), settings.strictTransactionManagement);
        ctx.writeAndFlush(ResponseMessage.build(msg).code(ResponseStatusCode.NO_CONTENT).create());
        return;
    }

    // timer for the total serialization time
    final StopWatch stopWatch = new StopWatch();
    stopWatch.start();

    // the batch size can be overridden by the request
    final int resultIterationBatchSize = (Integer) msg.optionalArgs(Tokens.ARGS_BATCH_SIZE)
            .orElse(settings.resultIterationBatchSize);
    List<Object> aggregate = new ArrayList<>(resultIterationBatchSize);

    // use an external control to manage the loop as opposed to just checking hasNext() in the while.  this
    // prevent situations where auto transactions create a new transaction after calls to commit() withing
    // the loop on calls to hasNext().
    boolean hasMore = itty.hasNext();

    while (hasMore) {
        if (Thread.interrupted())
            throw new InterruptedException();

        // have to check the aggregate size because it is possible that the channel is not writeable (below)
        // so iterating next() if the message is not written and flushed would bump the aggregate size beyond
        // the expected resultIterationBatchSize.  Total serialization time for the response remains in
        // effect so if the client is "slow" it may simply timeout.
        if (aggregate.size() < resultIterationBatchSize)
            aggregate.add(itty.next());

        // send back a page of results if batch size is met or if it's the end of the results being iterated.
        // also check writeability of the channel to prevent OOME for slow clients.
        if (ctx.channel().isWritable()) {
            if (aggregate.size() == resultIterationBatchSize || !itty.hasNext()) {
                final ResponseStatusCode code = itty.hasNext() ? ResponseStatusCode.PARTIAL_CONTENT
                        : ResponseStatusCode.SUCCESS;

                // serialize here because in sessionless requests the serialization must occur in the same
                // thread as the eval.  as eval occurs in the GremlinExecutor there's no way to get back to the
                // thread that processed the eval of the script so, we have to push serialization down into that
                Frame frame;
                try {
                    frame = makeFrame(ctx, msg, serializer, useBinary, aggregate, code);
                } catch (Exception ex) {
                    // exception is handled in makeFrame() - serialization error gets written back to driver
                    // at that point
                    if (manageTransactions)
                        attemptRollback(msg, context.getGraphManager(), settings.strictTransactionManagement);
                    break;
                }

                // only need to reset the aggregation list if there's more stuff to write
                if (itty.hasNext())
                    aggregate = new ArrayList<>(resultIterationBatchSize);
                else {
                    // iteration and serialization are both complete which means this finished successfully. note that
                    // errors internal to script eval or timeout will rollback given GremlinServer's global configurations.
                    // local errors will get rolledback below because the exceptions aren't thrown in those cases to be
                    // caught by the GremlinExecutor for global rollback logic. this only needs to be committed if
                    // there are no more items to iterate and serialization is complete
                    if (managedTransactionsForRequest)
                        attemptCommit(msg, context.getGraphManager(), settings.strictTransactionManagement);

                    // exit the result iteration loop as there are no more results left.  using this external control
                    // because of the above commit.  some graphs may open a new transaction on the call to
                    // hasNext()
                    hasMore = false;
                }

                // the flush is called after the commit has potentially occurred.  in this way, if a commit was
                // required then it will be 100% complete before the client receives it. the "frame" at this point
                // should have completely detached objects from the transaction (i.e. serialization has occurred)
                // so a new one should not be opened on the flush down the netty pipeline
                ctx.writeAndFlush(frame);
            }
        } else {
            // don't keep triggering this warning over and over again for the same request
            if (!warnOnce) {
                logger.warn(
                        "Pausing response writing as writeBufferHighWaterMark exceeded on {} - writing will continue once client has caught up",
                        msg);
                warnOnce = true;
            }

            // since the client is lagging we can hold here for a period of time for the client to catch up.
            // this isn't blocking the IO thread - just a worker.
            TimeUnit.MILLISECONDS.sleep(10);
        }

        stopWatch.split();
        if (stopWatch.getSplitTime() > settings.serializedResponseTimeout) {
            final String timeoutMsg = String.format(
                    "Serialization of the entire response exceeded the 'serializeResponseTimeout' setting %s",
                    warnOnce ? "[Gremlin Server paused writes to client as messages were not being consumed quickly enough]"
                            : "");
            throw new TimeoutException(timeoutMsg.trim());
        }

        stopWatch.unsplit();
    }

    stopWatch.stop();
}

From source file:com.emc.vipr.services.s3.FileAccessTest.java

/**
 * waits until the target access mode is completely transitioned on the specified bucket.
 *
 * @param bucketName bucket name/*w w  w.ja  v  a 2s.  c o m*/
 * @param targetMode target access mode to wait for (readOnly, readWrite, or disabled). Can be null if target mode
 *                   is unknown (if you're disabling a portion of the bucket and don't know if there
 *                   are still exported objects)
 * @param timeout    after the specified number of seconds, this method will throw a TimeoutException
 * @throws InterruptedException if interrupted while sleeping between GET intervals
 * @throws TimeoutException     if the specified timeout is reached before transition is complete
 */
protected void waitForTransition(String bucketName, ViPRConstants.FileAccessMode targetMode, int timeout)
        throws InterruptedException, TimeoutException {
    if (targetMode != null && targetMode.isTransitionState())
        throw new IllegalArgumentException("Invalid target mode: " + targetMode);
    long start = System.currentTimeMillis(), interval = 500;
    timeout *= 1000;
    while (true) {
        // GET the current access mode
        BucketFileAccessModeResult result = viprS3.getBucketFileAccessMode(bucketName);

        if (targetMode == null) {
            if (!result.getAccessMode().isTransitionState()) {
                return; // must be complete since the bucket is not in a transition state
            }
        } else {
            if (targetMode == result.getAccessMode()) {
                return; // transition is complete
            }

            if (!result.getAccessMode().isTransitionState()
                    || !result.getAccessMode().transitionsToTarget(targetMode))
                throw new RuntimeException(String.format("Bucket %s in mode %s will never get to mode %s",
                        bucketName, result.getAccessMode(), targetMode));
        }

        // if we've reached our timeout
        long runTime = System.currentTimeMillis() - start;
        if (runTime >= timeout)
            throw new TimeoutException(String.format(
                    "Access mode transition for %s took longer than %d seconds", bucketName, timeout / 1000));

        // transitioning; wait and query again
        long timeLeft = timeout - runTime;
        Thread.sleep(Math.min(timeLeft, interval));
    }
}

From source file:com.linkedin.databus2.core.container.netty.ServerContainer.java

public void awaitShutdown(long timeoutMs) throws TimeoutException, InterruptedException {
    long startTs = System.currentTimeMillis();
    long endTs = startTs + timeoutMs;
    _controlLock.lock();/*from w  w w.  j a  v  a  2s.co  m*/
    try {
        long waitTime;
        while (!_shutdownRequest && (waitTime = endTs - System.currentTimeMillis()) > 0) {
            LOG.info("waiting for shutdown request for container id: " + _containerStaticConfig.getId());
            if (!_shutdownCondition.await(waitTime, TimeUnit.MILLISECONDS))
                break;
        }
    } finally {
        _controlLock.unlock();
    }

    if (!_shutdownRequest) {
        LOG.error("timeout waiting for a shutdown request");
        throw new TimeoutException("timeout waiting for shutdown request");
    }

    _controlLock.lock();
    try {
        long waitTime;
        while (!_shutdown && (waitTime = endTs - System.currentTimeMillis()) > 0) {
            LOG.info("Waiting for shutdown complete for serving container: " + _containerStaticConfig.getId());
            if (!_shutdownFinishedCondition.await(waitTime, TimeUnit.MILLISECONDS))
                break;
        }
    } finally {
        _controlLock.unlock();
    }

    if (!_shutdown) {
        LOG.error("timeout waiting for shutdown");
        throw new TimeoutException("timeout waiting for shutdown to complete");
    }
}

From source file:org.cloudifysource.esc.driver.provisioning.ElasticMachineProvisioningCloudifyAdapter.java

@Override
public GridServiceAgent startMachine(final ExactZonesConfig zones, final GSAReservationId reservationId,
        final long duration, final TimeUnit unit) throws ElasticMachineProvisioningException,
        ElasticGridServiceAgentProvisioningException, InterruptedException, TimeoutException {

    logger.info("Cloudify Adapter is starting a new machine with zones " + zones.getZones()
            + " and reservation id " + reservationId);

    // calculate timeout
    final long end = System.currentTimeMillis() + unit.toMillis(duration);

    // provision the machine
    logger.info("Calling provisioning implementation for new machine");
    MachineDetails machineDetails;/*from ww  w.  j a va2s . c om*/
    cloudifyProvisioning.setAdmin(getGlobalAdminInstance(originalESMAdmin));

    final ZonesConfig defaultZones = config.getGridServiceAgentZones();
    logger.fine("default zones = " + defaultZones.getZones());
    if (!defaultZones.isSatisfiedBy(zones)) {
        throw new IllegalArgumentException(
                "The specified zones " + zones + " does not satisfy the configuration zones " + defaultZones);
    }

    String locationId = null;

    logger.fine("searching for cloud specific zone");
    for (final String zone : zones.getZones()) {
        logger.fine("current zone = " + zone);
        if (zone.startsWith(CLOUD_ZONE_PREFIX)) {
            logger.fine("found a zone with " + CLOUD_ZONE_PREFIX + " prefix : " + zone);
            if (locationId == null) {
                locationId = zone.substring(CLOUD_ZONE_PREFIX.length());
                logger.fine("passing locationId to machine provisioning as " + locationId);
            } else {
                throw new IllegalArgumentException(
                        "The specified zones " + zones + " should include only one zone with the "
                                + CLOUD_ZONE_PREFIX + " prefix:" + locationId);
            }
        }
    }

    final MachineStartRequestedCloudifyEvent machineStartEvent = new MachineStartRequestedCloudifyEvent();
    machineStartEvent.setTemplateName(cloudTemplateName);
    machineStartEvent.setLocationId(locationId);
    machineEventListener.elasticMachineProvisioningProgressChanged(machineStartEvent);

    try {
        final ComputeTemplate template = cloud.getCloudCompute().getTemplates().get(this.cloudTemplateName);
        if (locationId == null) {
            locationId = template.getLocationId();
        }

        // This is the call to the actual cloud driver implementation!
        machineDetails = provisionMachine(locationId, reservationId, duration, unit);

        // This is to protect against a bug in the Admin. see CLOUDIFY-1592
        // (https://cloudifysource.atlassian.net/browse/CLOUDIFY-1592)
        if (!machineDetails.isAgentRunning()) {
            validateMachineIp(machineDetails);
        }

        // Auto populate installer configuration with values set in template
        // if they were not previously set.
        if (machineDetails != null && machineDetails.getInstallerConfiguration() == null) {
            machineDetails.setInstallerConfigutation(template.getInstaller());
        }

    } catch (final Exception e) {
        throw new ElasticMachineProvisioningException("Failed to provision machine: " + e.getMessage(), e);
    }

    logger.info("Machine was provisioned by implementation. Machine is: " + machineDetails);

    // which IP should be used in the cluster
    String machineIp;
    if (cloud.getConfiguration().isConnectToPrivateIp()) {
        machineIp = machineDetails.getPrivateAddress();
    } else {
        machineIp = machineDetails.getPublicAddress();
    }
    if (machineIp == null) {
        throw new IllegalStateException(
                "The IP of the new machine is null! Machine Details are: " + machineDetails + " .");
    }

    final MachineStartedCloudifyEvent machineStartedEvent = new MachineStartedCloudifyEvent();
    machineStartedEvent.setMachineDetails(machineDetails);
    machineStartedEvent.setHostAddress(machineIp);
    machineEventListener.elasticMachineProvisioningProgressChanged(machineStartedEvent);

    final GridServiceAgentStartRequestedEvent agentStartEvent = new GridServiceAgentStartRequestedEvent();
    agentStartEvent.setHostAddress(machineIp);
    agentEventListener.elasticGridServiceAgentProvisioningProgressChanged(agentStartEvent);
    final String volumeId = null;
    try {
        // check for timeout
        checkForProvisioningTimeout(end, machineDetails);

        if (machineDetails.isAgentRunning()) {
            logger.info(
                    "Machine provisioning provided a machine and indicated that an agent is already running");
        } else {
            // install gigaspaces and start agent
            logger.info("Cloudify Adapter is installing Cloudify agent with reservation id " + reservationId
                    + " on " + machineIp);
            installAndStartAgent(machineDetails, reservationId, end);
            // check for timeout again - the installation step can also take
            // a while to complete.
            checkForProvisioningTimeout(end, machineDetails);
        }

        // wait for GSA to become available
        logger.info("Cloudify adapter is waiting for GSA on host: " + machineIp + " with reservation id: "
                + reservationId + " to become available");
        final GridServiceAgent gsa = waitForGsa(machineIp, end, reservationId);
        if (gsa == null) {
            // GSA did not start correctly or on time - shutdown the machine
            throw new TimeoutException("New machine was provisioned and Cloudify was installed, "
                    + "but a GSA was not discovered on the new machine: " + machineDetails);
        }

        // TODO: Derive cloudify specific event and include more event details as specified in CLOUDIFY-10651
        agentEventListener.elasticGridServiceAgentProvisioningProgressChanged(
                new GridServiceAgentStartedEvent(machineIp, gsa.getUid()));

        // check that the agent is really started with the expected env variable of the template
        // we inject this variable earlier on to the bootstrap-management.sh script
        if (gsa.getVirtualMachine().getDetails().getEnvironmentVariables()
                .get(CloudifyConstants.GIGASPACES_CLOUD_TEMPLATE_NAME) == null) {
            throw new ElasticGridServiceAgentProvisioningException(
                    "an agent was started. but the property " + CloudifyConstants.GIGASPACES_CLOUD_TEMPLATE_NAME
                            + " was missing from its environment variables.");
        }

        return gsa;
    } catch (final ElasticMachineProvisioningException e) {
        logger.info("ElasticMachineProvisioningException occurred, " + e.getMessage());
        logger.info(ExceptionUtils.getFullStackTrace(e));
        handleExceptionAfterMachineCreated(machineIp, volumeId, machineDetails, end, reservationId);
        throw e;
    } catch (final ElasticGridServiceAgentProvisioningException e) {
        logger.info("ElasticGridServiceAgentProvisioningException occurred, " + e.getMessage());
        logger.info(ExceptionUtils.getFullStackTrace(e));
        handleExceptionAfterMachineCreated(machineIp, volumeId, machineDetails, end, reservationId);
        throw e;
    } catch (final TimeoutException e) {
        logger.info("TimeoutException occurred, " + e.getMessage());
        logger.info(ExceptionUtils.getFullStackTrace(e));
        handleExceptionAfterMachineCreated(machineIp, volumeId, machineDetails, end, reservationId);
        throw e;
    } catch (final InterruptedException e) {
        logger.info("InterruptedException occurred, " + e.getMessage());
        logger.info(ExceptionUtils.getFullStackTrace(e));
        handleExceptionAfterMachineCreated(machineIp, volumeId, machineDetails, end, reservationId);
        throw e;
    } catch (final Throwable e) {
        logger.info("Unexpected exception occurred, " + e.getMessage());
        logger.info(ExceptionUtils.getFullStackTrace(e));
        handleExceptionAfterMachineCreated(machineIp, volumeId, machineDetails, end, reservationId);
        throw new IllegalStateException("Unexpected exception during machine provisioning", e);
    }
}

From source file:org.cloudifysource.esc.driver.provisioning.byon.ByonProvisioningDriver.java

private void stopAgentAndWait(final int expectedGsmCount, final String ipAddress)
        throws TimeoutException, InterruptedException {

    if (admin == null) {
        final Integer discoveryPort = getLusPort();
        admin = Utils.getAdminObject(ipAddress, expectedGsmCount, discoveryPort);
    }//w w  w.ja v a2s .c om

    final Map<String, GridServiceAgent> agentsMap = admin.getGridServiceAgents().getHostAddress();
    // GridServiceAgent agent = agentsMap.get(ipAddress);
    GSA agent = null;
    for (final Entry<String, GridServiceAgent> agentEntry : agentsMap.entrySet()) {
        if (IPUtils.isSameIpAddress(agentEntry.getKey(), ipAddress)
                || agentEntry.getKey().equalsIgnoreCase(ipAddress)) {
            agent = ((InternalGridServiceAgent) agentEntry.getValue()).getGSA();
        }
    }

    if (agent != null) {
        logger.info("ByonProvisioningDriver: shutting down agent on server: " + ipAddress);
        try {
            admin.close();
            agent.shutdown();
        } catch (final RemoteException e) {
            if (!NetworkExceptionHelper.isConnectOrCloseException(e)) {
                logger.log(Level.FINER, "Failed to shutdown GSA", e);
                throw new AdminException("Failed to shutdown GSA", e);
            }
        }

        final long end = System.currentTimeMillis()
                + TimeUnit.MINUTES.toMillis(AGENT_SHUTDOWN_TIMEOUT_IN_MINUTES);
        boolean agentUp = isAgentUp(agent);
        while (agentUp && System.currentTimeMillis() < end) {
            logger.fine("next check in " + TimeUnit.MILLISECONDS.toSeconds(THREAD_WAITING_IDLE_TIME_IN_SECS)
                    + " seconds");
            Thread.sleep(TimeUnit.SECONDS.toMillis(THREAD_WAITING_IDLE_TIME_IN_SECS));
            agentUp = isAgentUp(agent);
        }

        if (!agentUp && System.currentTimeMillis() >= end) {
            throw new TimeoutException("Agent shutdown timed out (agent IP: " + ipAddress + ")");
        }
    }
}

From source file:com.vmware.photon.controller.common.xenon.ServiceHostUtils.java

public static <H extends ServiceHost> void deleteAllDocuments(H host, String referrer, long timeout,
        TimeUnit timeUnit) throws Throwable {
    QueryTask.Query selfLinkClause = new QueryTask.Query()
            .setTermPropertyName(ServiceDocument.FIELD_NAME_SELF_LINK).setTermMatchValue("/photon/*")
            .setTermMatchType(QueryTask.QueryTerm.MatchType.WILDCARD);

    QueryTask.QuerySpecification querySpecification = new QueryTask.QuerySpecification();
    querySpecification.query.addBooleanClause(selfLinkClause);
    QueryTask queryTask = QueryTask.create(querySpecification).setDirect(true);

    NodeGroupBroadcastResponse queryResponse = ServiceHostUtils.sendBroadcastQueryAndWait(host, referrer,
            queryTask);//  w  ww . j a v  a2 s .c  om

    Set<String> documentLinks = QueryTaskUtils.getBroadcastQueryDocumentLinks(queryResponse);

    if (documentLinks == null || documentLinks.size() <= 0) {
        return;
    }

    CountDownLatch latch = new CountDownLatch(1);

    OperationJoin.JoinedCompletionHandler handler = new OperationJoin.JoinedCompletionHandler() {
        @Override
        public void handle(Map<Long, Operation> ops, Map<Long, Throwable> failures) {
            if (failures != null && !failures.isEmpty()) {
                for (Throwable e : failures.values()) {
                    logger.error("deleteAllDocuments failed", e);
                }
            }
            latch.countDown();
        }
    };

    Collection<Operation> deletes = new LinkedList<>();
    for (String documentLink : documentLinks) {
        Operation deleteOperation = Operation.createDelete(UriUtils.buildUri(host, documentLink)).setBody("{}")
                .setReferer(UriUtils.buildUri(host, referrer));
        deletes.add(deleteOperation);
    }

    OperationJoin join = OperationJoin.create(deletes);
    join.setCompletion(handler);
    join.sendWith(host);
    if (!latch.await(timeout, timeUnit)) {
        throw new TimeoutException(String
                .format("Deletion of all documents timed out. Timeout:{%s}, TimeUnit:{%s}", timeout, timeUnit));
    }
}

From source file:com.microsoft.azurebatch.jenkins.azurebatch.AzureBatchHelper.java

private void waitForPoolReady(String poolId)
        throws BatchErrorException, IOException, InterruptedException, TimeoutException {
    long startTime = System.currentTimeMillis();
    long elapsedTime = 0L;
    boolean poolSteady = false;

    // Wait max 15 minutes for pool to reach steady
    final long maxPoolSteadyWaitTimeInMinutes = 15;
    Logger.log(listener, String.format("Waiting for pool %s steady...", poolId));
    while (elapsedTime < maxPoolSteadyWaitTimeInMinutes * 60 * 1000) {
        CloudPool pool = client.poolOperations().getPool(poolId);
        if (pool.allocationState() == AllocationState.STEADY) {
            poolSteady = true;/*from   w  w  w. j av  a  2  s. co  m*/
            break;
        }

        Thread.sleep(15 * 1000);
        elapsedTime = System.currentTimeMillis() - startTime;
    }

    if (!poolSteady) {
        throw new TimeoutException(String.format("Pool %s is not steady after %d minutes.", poolId,
                maxPoolSteadyWaitTimeInMinutes));
    } else {
        Logger.log(listener, "Pool %s is steady.", poolId);
    }
}

From source file:com.microsoft.azurebatch.jenkins.azurebatch.AzureBatchHelper.java

private void waitForAtLeastOneVmReady(String poolId)
        throws BatchErrorException, IOException, InterruptedException, TimeoutException {
    long startTime = System.currentTimeMillis();
    long elapsedTime = 0L;
    boolean vmReady = false;

    // Wait max 20 minutes for VM to start up
    final long maxVmIdleWaitTimeInMinutes = 20;
    Logger.log(listener, String.format("Waiting for pool %s at least one VM ready...", poolId));
    while (elapsedTime < maxVmIdleWaitTimeInMinutes * 60 * 1000) {

        List<ComputeNode> nodeCollection = client.computeNodeOperations().listComputeNodes(poolId,
                new DetailLevel.Builder().withSelectClause("state")
                        .withFilterClause("state eq 'idle' or state eq 'running'").build());
        for (ComputeNode node : nodeCollection) {
            ComputeNodeState nodeState = node.state();
            if (nodeState == ComputeNodeState.IDLE || nodeState == ComputeNodeState.RUNNING) {
                vmReady = true;//from   w w  w.  j a v  a 2  s .  co m
                break;
            }
        }

        if (vmReady) {
            break;
        }

        long nextWaitTime = 15 * 1000 - (System.currentTimeMillis() - startTime - elapsedTime);
        if (nextWaitTime > 0) {
            Thread.sleep(nextWaitTime);
        }

        elapsedTime = System.currentTimeMillis() - startTime;
    }

    if (!vmReady) {
        throw new TimeoutException(
                String.format("Pool %s no VM is ready after %d minutes.", poolId, maxVmIdleWaitTimeInMinutes));
    } else {
        Logger.log(listener, "Pool %s at least one VM is ready.", poolId);
    }
}