Example usage for java.util.concurrent TimeoutException TimeoutException

Introduction

In this page you can find the example usage for java.util.concurrent TimeoutException TimeoutException.

Prototype

public TimeoutException(String message)

Source Link

Document

Constructs a TimeoutException with the specified detail message.

Usage

From source file:com.vmware.photon.controller.common.dcp.ServiceHostUtils.java

public static <H extends ServiceHost> void deleteAllDocuments(H host, String referrer, long timeout,
        TimeUnit timeUnit) throws Throwable {
    QueryTask.Query selfLinkClause = new QueryTask.Query()
            .setTermPropertyName(ServiceDocument.FIELD_NAME_SELF_LINK).setTermMatchValue("*")
            .setTermMatchType(QueryTask.QueryTerm.MatchType.WILDCARD);

    QueryTask.QuerySpecification querySpecification = new QueryTask.QuerySpecification();
    querySpecification.query.addBooleanClause(selfLinkClause);
    QueryTask queryTask = QueryTask.create(querySpecification).setDirect(true);

    NodeGroupBroadcastResponse queryResponse = ServiceHostUtils.sendBroadcastQueryAndWait(host, referrer,
            queryTask);//from w ww .  j a v a2  s .c  om

    Set<String> documentLinks = QueryTaskUtils.getBroadcastQueryResults(queryResponse);

    if (documentLinks == null || documentLinks.size() <= 0) {
        return;
    }

    CountDownLatch latch = new CountDownLatch(1);

    OperationJoin.JoinedCompletionHandler handler = new OperationJoin.JoinedCompletionHandler() {
        @Override
        public void handle(Map<Long, Operation> ops, Map<Long, Throwable> failures) {
            if (failures != null && !failures.isEmpty()) {
                for (Throwable e : failures.values()) {
                    logger.error("deleteAllDocuments failed", e);
                }
            }
            latch.countDown();
        }
    };

    Collection<Operation> deletes = new LinkedList<>();
    for (String documentLink : documentLinks) {
        Operation deleteOperation = Operation.createDelete(UriUtils.buildUri(host, documentLink)).setBody("{}")
                .setReferer(UriUtils.buildUri(host, referrer));
        deletes.add(deleteOperation);
    }

    OperationJoin join = OperationJoin.create(deletes);
    join.setCompletion(handler);
    join.sendWith(host);
    if (!latch.await(timeout, timeUnit)) {
        throw new TimeoutException(String
                .format("Deletion of all documents timed out. Timeout:{%s}, TimeUnit:{%s}", timeout, timeUnit));
    }
}

From source file:brooklyn.util.internal.ssh.sshj.SshjTool.java

/**
 * Executes the script in the background (`nohup ... &`), and then executes other ssh commands to poll for the
 * stdout, stderr and exit code of that original process (which will each have been written to separate files).
 * /*w w  w .j av  a  2 s .  com*/
 * The polling is a "long poll". That is, it executes a long-running ssh command to retrieve the stdout, etc.
 * If that long-poll command fails, then we just execute another one to pick up from where it left off.
 * This means we do not need to execute many ssh commands (which are expensive), but can still return promptly
 * when the command completes.
 * 
 * Much of this was motivated by https://issues.apache.org/jira/browse/BROOKLYN-106, which is no longer
 * an issue. The retries (e.g. in the upload-script) are arguably overkill given that {@link #acquire(SshAction)}
 * will already retry. However, leaving this in place as it could prove useful when working with flakey
 * networks in the future.
 * 
 * TODO There are (probably) issues with this method when using {@link ShellTool#PROP_RUN_AS_ROOT}.
 * I (Aled) saw the .pid file having an owner of root:root, and a failure message in stderr of:
 *   -bash: line 3: /tmp/brooklyn-20150113-161203056-XMEo-move_install_dir_from_user_to_.pid: Permission denied
 */
protected int execScriptAsyncAndPoll(final Map<String, ?> props, final List<String> commands,
        final Map<String, ?> env) {
    return new ToolAbstractAsyncExecScript(props) {
        private int maxConsecutiveSshFailures = 3;
        private Duration maxDelayBetweenPolls = Duration.seconds(20);
        private Duration pollTimeout = getOptionalVal(props, PROP_EXEC_ASYNC_POLLING_TIMEOUT,
                Duration.FIVE_MINUTES);
        private int iteration = 0;
        private int consecutiveSshFailures = 0;
        private int stdoutCount = 0;
        private int stderrCount = 0;
        private Stopwatch timer;

        public int run() {
            timer = Stopwatch.createStarted();
            final String scriptContents = toScript(props, commands, env);
            if (LOG.isTraceEnabled())
                LOG.trace("Running shell command at {} as async script: {}", host, scriptContents);

            // Upload script; try repeatedly because have seen timeout intermittently on vcloud-director (BROOKLYN-106 related).
            boolean uploadSuccess = Repeater
                    .create("async script upload on " + SshjTool.this.toString() + " (for " + getSummary()
                            + ")")
                    .backoffTo(maxDelayBetweenPolls).limitIterationsTo(3).rethrowException()
                    .until(new Callable<Boolean>() {
                        @Override
                        public Boolean call() throws Exception {
                            iteration++;
                            if (LOG.isDebugEnabled()) {
                                String msg = "Uploading (iteration=" + iteration + ") for async script on "
                                        + SshjTool.this.toString() + " (for " + getSummary() + ")";
                                if (iteration == 1) {
                                    LOG.trace(msg);
                                } else {
                                    LOG.debug(msg);
                                }
                            }
                            copyToServer(ImmutableMap.of("permissions", "0700"), scriptContents.getBytes(),
                                    scriptPath);
                            return true;
                        }
                    }).run();

            if (!uploadSuccess) {
                // Unexpected! Should have either returned true or have rethrown the exception; should never get false.
                String msg = "Unexpected state: repeated failure for async script upload on "
                        + SshjTool.this.toString() + " (" + getSummary() + ")";
                LOG.warn(msg + "; rethrowing");
                throw new IllegalStateException(msg);
            }

            // Execute script asynchronously
            int execResult = asInt(acquire(new ShellAction(buildRunScriptCommand(), out, err, execTimeout)),
                    -1);
            if (execResult != 0)
                return execResult;

            // Long polling to get the status
            try {
                final AtomicReference<Integer> result = new AtomicReference<Integer>();
                boolean success = Repeater
                        .create("async script long-poll on " + SshjTool.this.toString() + " (for "
                                + getSummary() + ")")
                        .backoffTo(maxDelayBetweenPolls).limitTimeTo(execTimeout)
                        .until(new Callable<Boolean>() {
                            @Override
                            public Boolean call() throws Exception {
                                iteration++;
                                if (LOG.isDebugEnabled())
                                    LOG.debug("Doing long-poll (iteration=" + iteration
                                            + ") for async script to complete on " + SshjTool.this.toString()
                                            + " (for " + getSummary() + ")");
                                Integer exitstatus = longPoll();
                                result.set(exitstatus);
                                return exitstatus != null;
                            }
                        }).run();

                if (!success) {
                    // Timed out
                    String msg = "Timeout for async script to complete on " + SshjTool.this.toString() + " ("
                            + getSummary() + ")";
                    LOG.warn(msg + "; rethrowing");
                    throw new TimeoutException(msg);
                }

                return result.get();

            } catch (Exception e) {
                LOG.debug("Problem polling for async script on " + SshjTool.this.toString() + " (for "
                        + getSummary() + "); rethrowing after deleting temporary files", e);
                throw Exceptions.propagate(e);
            } finally {
                // Delete the temporary files created (and the `tail -c` commands that might have been left behind by long-polls).
                // Using pollTimeout so doesn't wait forever, but waits for a reasonable (configurable) length of time.
                // TODO also execute this if the `buildRunScriptCommand` fails, as that might have left files behind?
                try {
                    int execDeleteResult = asInt(
                            acquire(new ShellAction(deleteTemporaryFilesCommand(), out, err, pollTimeout)), -1);
                    if (execDeleteResult != 0) {
                        LOG.debug("Problem deleting temporary files of async script on "
                                + SshjTool.this.toString() + " (for " + getSummary() + "): exit status "
                                + execDeleteResult);
                    }
                } catch (Exception e) {
                    Exceptions.propagateIfFatal(e);
                    LOG.debug("Problem deleting temporary files of async script on " + SshjTool.this.toString()
                            + " (for " + getSummary() + "); continuing", e);
                }
            }
        }

        Integer longPoll() throws IOException {
            // Long-polling to get stdout, stderr + exit status of async task.
            // If our long-poll disconnects, we will just re-execute.
            // We wrap the stdout/stderr so that we can get the size count. 
            // If we disconnect, we will pick up from that char of the stream.
            // TODO Additional stdout/stderr written by buildLongPollCommand() could interfere, 
            //      causing us to miss some characters.
            Duration nextPollTimeout = Duration.min(pollTimeout,
                    Duration.millis(execTimeout.toMilliseconds() - timer.elapsed(TimeUnit.MILLISECONDS)));
            CountingOutputStream countingOut = (out == null) ? null : new CountingOutputStream(out);
            CountingOutputStream countingErr = (err == null) ? null : new CountingOutputStream(err);
            List<String> pollCommand = buildLongPollCommand(stdoutCount, stderrCount, nextPollTimeout);
            Duration sshJoinTimeout = nextPollTimeout.add(Duration.TEN_SECONDS);
            ShellAction action = new ShellAction(pollCommand, countingOut, countingErr, sshJoinTimeout);

            int longPollResult;
            try {
                longPollResult = asInt(acquire(action, 3, nextPollTimeout), -1);
            } catch (RuntimeTimeoutException e) {
                if (LOG.isDebugEnabled())
                    LOG.debug("Long-poll timed out on " + SshjTool.this.toString() + " (for " + getSummary()
                            + "): " + e);
                return null;
            }
            stdoutCount += (countingOut == null) ? 0 : countingOut.getCount();
            stderrCount += (countingErr == null) ? 0 : countingErr.getCount();

            if (longPollResult == 0) {
                if (LOG.isDebugEnabled())
                    LOG.debug("Long-poll succeeded (exit status 0) on " + SshjTool.this.toString() + " (for "
                            + getSummary() + ")");
                return longPollResult; // success

            } else if (longPollResult == -1) {
                // probably a connection failure; try again
                if (LOG.isDebugEnabled())
                    LOG.debug("Long-poll received exit status -1; will retry on " + SshjTool.this.toString()
                            + " (for " + getSummary() + ")");
                return null;

            } else if (longPollResult == 125) {
                // 125 is the special code for timeout in long-poll (see buildLongPollCommand).
                // However, there is a tiny chance that the underlying command might have returned that exact exit code!
                // Don't treat a timeout as a "consecutiveSshFailure".
                if (LOG.isDebugEnabled())
                    LOG.debug("Long-poll received exit status " + longPollResult
                            + "; most likely timeout; retrieving actual status on " + SshjTool.this.toString()
                            + " (for " + getSummary() + ")");
                return retrieveStatusCommand();

            } else {
                // want to double-check whether this is the exit-code from the async process, or
                // some unexpected failure in our long-poll command.
                if (LOG.isDebugEnabled())
                    LOG.debug("Long-poll received exit status " + longPollResult
                            + "; retrieving actual status on " + SshjTool.this.toString() + " (for "
                            + getSummary() + ")");
                Integer result = retrieveStatusCommand();
                if (result != null) {
                    return result;
                }
            }

            consecutiveSshFailures++;
            if (consecutiveSshFailures > maxConsecutiveSshFailures) {
                LOG.warn("Aborting on " + consecutiveSshFailures
                        + " consecutive ssh connection errors (return -1) when polling for async script to complete on "
                        + SshjTool.this.toString() + " (" + getSummary() + ")");
                return -1;
            } else {
                LOG.info("Retrying after ssh connection error when polling for async script to complete on "
                        + SshjTool.this.toString() + " (" + getSummary() + ")");
                return null;
            }
        }

        Integer retrieveStatusCommand() throws IOException {
            // want to double-check whether this is the exit-code from the async process, or
            // some unexpected failure in our long-poll command.
            ByteArrayOutputStream statusOut = new ByteArrayOutputStream();
            ByteArrayOutputStream statusErr = new ByteArrayOutputStream();
            int statusResult = asInt(
                    acquire(new ShellAction(buildRetrieveStatusCommand(), statusOut, statusErr, execTimeout)),
                    -1);

            if (statusResult == 0) {
                // The status we retrieved really is valid; return it.
                // TODO How to ensure no additional output in stdout/stderr when parsing below?
                String statusOutStr = new String(statusOut.toByteArray()).trim();
                if (Strings.isEmpty(statusOutStr)) {
                    // suggests not yet completed; will retry with long-poll
                    if (LOG.isDebugEnabled())
                        LOG.debug(
                                "Long-poll retrieved status directly; command successful but no result available on "
                                        + SshjTool.this.toString() + " (for " + getSummary() + ")");
                    return null;
                } else {
                    if (LOG.isDebugEnabled())
                        LOG.debug("Long-poll retrieved status directly; returning '" + statusOutStr + "' on "
                                + SshjTool.this.toString() + " (for " + getSummary() + ")");
                    int result = Integer.parseInt(statusOutStr);
                    return result;
                }

            } else if (statusResult == -1) {
                // probably a connection failure; try again with long-poll
                if (LOG.isDebugEnabled())
                    LOG.debug("Long-poll retrieving status directly received exit status -1; will retry on "
                            + SshjTool.this.toString() + " (for " + getSummary() + ")");
                return null;

            } else {
                if (out != null) {
                    out.write(toUTF8ByteArray(
                            "retrieving status failed with exit code " + statusResult + " (stdout follow)"));
                    out.write(statusOut.toByteArray());
                }
                if (err != null) {
                    err.write(toUTF8ByteArray(
                            "retrieving status failed with exit code " + statusResult + " (stderr follow)"));
                    err.write(statusErr.toByteArray());
                }

                if (LOG.isDebugEnabled())
                    LOG.debug("Long-poll retrieving status failed; returning " + statusResult + " on "
                            + SshjTool.this.toString() + " (for " + getSummary() + ")");
                return statusResult;
            }
        }
    }.run();
}

From source file:org.apache.tinkerpop.gremlin.server.op.AbstractEvalOpProcessor.java

/**
 * Called by {@link #evalOpInternal} when iterating a result set. Implementers should respect the
 * {@link Settings#serializedResponseTimeout} configuration and break the serialization process if
 * it begins to take too long to do so, throwing a {@link java.util.concurrent.TimeoutException} in such
 * cases./* w ww. j  a  v a  2  s .co  m*/
 *
 * @param context The Gremlin Server {@link Context} object containing settings, request message, etc.
 * @param itty The result to iterator
 * @throws TimeoutException if the time taken to serialize the entire result set exceeds the allowable time.
 */
protected void handleIterator(final Context context, final Iterator itty)
        throws TimeoutException, InterruptedException {
    final ChannelHandlerContext ctx = context.getChannelHandlerContext();
    final RequestMessage msg = context.getRequestMessage();
    final Settings settings = context.getSettings();
    final MessageSerializer serializer = ctx.channel().attr(StateKey.SERIALIZER).get();
    final boolean useBinary = ctx.channel().attr(StateKey.USE_BINARY).get();
    boolean warnOnce = false;

    // sessionless requests are always transaction managed, but in-session requests are configurable.
    final boolean managedTransactionsForRequest = manageTransactions ? true
            : (Boolean) msg.getArgs().getOrDefault(Tokens.ARGS_MANAGE_TRANSACTION, false);

    // we have an empty iterator - happens on stuff like: g.V().iterate()
    if (!itty.hasNext()) {
        // as there is nothing left to iterate if we are transaction managed then we should execute a
        // commit here before we send back a NO_CONTENT which implies success
        if (managedTransactionsForRequest)
            attemptCommit(msg, context.getGraphManager(), settings.strictTransactionManagement);
        ctx.writeAndFlush(ResponseMessage.build(msg).code(ResponseStatusCode.NO_CONTENT).create());
        return;
    }

    // timer for the total serialization time
    final StopWatch stopWatch = new StopWatch();
    stopWatch.start();

    // the batch size can be overridden by the request
    final int resultIterationBatchSize = (Integer) msg.optionalArgs(Tokens.ARGS_BATCH_SIZE)
            .orElse(settings.resultIterationBatchSize);
    List<Object> aggregate = new ArrayList<>(resultIterationBatchSize);

    // use an external control to manage the loop as opposed to just checking hasNext() in the while.  this
    // prevent situations where auto transactions create a new transaction after calls to commit() withing
    // the loop on calls to hasNext().
    boolean hasMore = itty.hasNext();

    while (hasMore) {
        if (Thread.interrupted())
            throw new InterruptedException();

        // have to check the aggregate size because it is possible that the channel is not writeable (below)
        // so iterating next() if the message is not written and flushed would bump the aggregate size beyond
        // the expected resultIterationBatchSize.  Total serialization time for the response remains in
        // effect so if the client is "slow" it may simply timeout.
        if (aggregate.size() < resultIterationBatchSize)
            aggregate.add(itty.next());

        // send back a page of results if batch size is met or if it's the end of the results being iterated.
        // also check writeability of the channel to prevent OOME for slow clients.
        if (ctx.channel().isWritable()) {
            if (aggregate.size() == resultIterationBatchSize || !itty.hasNext()) {
                final ResponseStatusCode code = itty.hasNext() ? ResponseStatusCode.PARTIAL_CONTENT
                        : ResponseStatusCode.SUCCESS;

                // serialize here because in sessionless requests the serialization must occur in the same
                // thread as the eval.  as eval occurs in the GremlinExecutor there's no way to get back to the
                // thread that processed the eval of the script so, we have to push serialization down into that
                Frame frame;
                try {
                    frame = makeFrame(ctx, msg, serializer, useBinary, aggregate, code);
                } catch (Exception ex) {
                    // exception is handled in makeFrame() - serialization error gets written back to driver
                    // at that point
                    if (manageTransactions)
                        attemptRollback(msg, context.getGraphManager(), settings.strictTransactionManagement);
                    break;
                }

                // only need to reset the aggregation list if there's more stuff to write
                if (itty.hasNext())
                    aggregate = new ArrayList<>(resultIterationBatchSize);
                else {
                    // iteration and serialization are both complete which means this finished successfully. note that
                    // errors internal to script eval or timeout will rollback given GremlinServer's global configurations.
                    // local errors will get rolledback below because the exceptions aren't thrown in those cases to be
                    // caught by the GremlinExecutor for global rollback logic. this only needs to be committed if
                    // there are no more items to iterate and serialization is complete
                    if (managedTransactionsForRequest)
                        attemptCommit(msg, context.getGraphManager(), settings.strictTransactionManagement);

                    // exit the result iteration loop as there are no more results left.  using this external control
                    // because of the above commit.  some graphs may open a new transaction on the call to
                    // hasNext()
                    hasMore = false;
                }

                // the flush is called after the commit has potentially occurred.  in this way, if a commit was
                // required then it will be 100% complete before the client receives it. the "frame" at this point
                // should have completely detached objects from the transaction (i.e. serialization has occurred)
                // so a new one should not be opened on the flush down the netty pipeline
                ctx.writeAndFlush(frame);
            }
        } else {
            // don't keep triggering this warning over and over again for the same request
            if (!warnOnce) {
                logger.warn(
                        "Pausing response writing as writeBufferHighWaterMark exceeded on {} - writing will continue once client has caught up",
                        msg);
                warnOnce = true;
            }

            // since the client is lagging we can hold here for a period of time for the client to catch up.
            // this isn't blocking the IO thread - just a worker.
            TimeUnit.MILLISECONDS.sleep(10);
        }

        stopWatch.split();
        if (stopWatch.getSplitTime() > settings.serializedResponseTimeout) {
            final String timeoutMsg = String.format(
                    "Serialization of the entire response exceeded the 'serializeResponseTimeout' setting %s",
                    warnOnce ? "[Gremlin Server paused writes to client as messages were not being consumed quickly enough]"
                            : "");
            throw new TimeoutException(timeoutMsg.trim());
        }

        stopWatch.unsplit();
    }

    stopWatch.stop();
}

From source file:com.emc.vipr.services.s3.FileAccessTest.java

/**
 * waits until the target access mode is completely transitioned on the specified bucket.
 *
 * @param bucketName bucket name/*w w  w.ja  v  a 2s.  c o m*/
 * @param targetMode target access mode to wait for (readOnly, readWrite, or disabled). Can be null if target mode
 *                   is unknown (if you're disabling a portion of the bucket and don't know if there
 *                   are still exported objects)
 * @param timeout    after the specified number of seconds, this method will throw a TimeoutException
 * @throws InterruptedException if interrupted while sleeping between GET intervals
 * @throws TimeoutException     if the specified timeout is reached before transition is complete
 */
protected void waitForTransition(String bucketName, ViPRConstants.FileAccessMode targetMode, int timeout)
        throws InterruptedException, TimeoutException {
    if (targetMode != null && targetMode.isTransitionState())
        throw new IllegalArgumentException("Invalid target mode: " + targetMode);
    long start = System.currentTimeMillis(), interval = 500;
    timeout *= 1000;
    while (true) {
        // GET the current access mode
        BucketFileAccessModeResult result = viprS3.getBucketFileAccessMode(bucketName);

        if (targetMode == null) {
            if (!result.getAccessMode().isTransitionState()) {
                return; // must be complete since the bucket is not in a transition state
            }
        } else {
            if (targetMode == result.getAccessMode()) {
                return; // transition is complete
            }

            if (!result.getAccessMode().isTransitionState()
                    || !result.getAccessMode().transitionsToTarget(targetMode))
                throw new RuntimeException(String.format("Bucket %s in mode %s will never get to mode %s",
                        bucketName, result.getAccessMode(), targetMode));
        }

        // if we've reached our timeout
        long runTime = System.currentTimeMillis() - start;
        if (runTime >= timeout)
            throw new TimeoutException(String.format(
                    "Access mode transition for %s took longer than %d seconds", bucketName, timeout / 1000));

        // transitioning; wait and query again
        long timeLeft = timeout - runTime;
        Thread.sleep(Math.min(timeLeft, interval));
    }
}

From source file:com.linkedin.databus2.core.container.netty.ServerContainer.java

public void awaitShutdown(long timeoutMs) throws TimeoutException, InterruptedException {
    long startTs = System.currentTimeMillis();
    long endTs = startTs + timeoutMs;
    _controlLock.lock();/*from w  w w.  j a  v  a  2s.co  m*/
    try {
        long waitTime;
        while (!_shutdownRequest && (waitTime = endTs - System.currentTimeMillis()) > 0) {
            LOG.info("waiting for shutdown request for container id: " + _containerStaticConfig.getId());
            if (!_shutdownCondition.await(waitTime, TimeUnit.MILLISECONDS))
                break;
        }
    } finally {
        _controlLock.unlock();
    }

    if (!_shutdownRequest) {
        LOG.error("timeout waiting for a shutdown request");
        throw new TimeoutException("timeout waiting for shutdown request");
    }

    _controlLock.lock();
    try {
        long waitTime;
        while (!_shutdown && (waitTime = endTs - System.currentTimeMillis()) > 0) {
            LOG.info("Waiting for shutdown complete for serving container: " + _containerStaticConfig.getId());
            if (!_shutdownFinishedCondition.await(waitTime, TimeUnit.MILLISECONDS))
                break;
        }
    } finally {
        _controlLock.unlock();
    }

    if (!_shutdown) {
        LOG.error("timeout waiting for shutdown");
        throw new TimeoutException("timeout waiting for shutdown to complete");
    }
}

From source file:org.cloudifysource.esc.driver.provisioning.ElasticMachineProvisioningCloudifyAdapter.java

@Override
public GridServiceAgent startMachine(final ExactZonesConfig zones, final GSAReservationId reservationId,
        final long duration, final TimeUnit unit) throws ElasticMachineProvisioningException,
        ElasticGridServiceAgentProvisioningException, InterruptedException, TimeoutException {

    logger.info("Cloudify Adapter is starting a new machine with zones " + zones.getZones()
            + " and reservation id " + reservationId);

    // calculate timeout
    final long end = System.currentTimeMillis() + unit.toMillis(duration);

    // provision the machine
    logger.info("Calling provisioning implementation for new machine");
    MachineDetails machineDetails;/*from ww  w.  j a va2s . c om*/
    cloudifyProvisioning.setAdmin(getGlobalAdminInstance(originalESMAdmin));

    final ZonesConfig defaultZones = config.getGridServiceAgentZones();
    logger.fine("default zones = " + defaultZones.getZones());
    if (!defaultZones.isSatisfiedBy(zones)) {
        throw new IllegalArgumentException(
                "The specified zones " + zones + " does not satisfy the configuration zones " + defaultZones);
    }

    String locationId = null;

    logger.fine("searching for cloud specific zone");
    for (final String zone : zones.getZones()) {
        logger.fine("current zone = " + zone);
        if (zone.startsWith(CLOUD_ZONE_PREFIX)) {
            logger.fine("found a zone with " + CLOUD_ZONE_PREFIX + " prefix : " + zone);
            if (locationId == null) {
                locationId = zone.substring(CLOUD_ZONE_PREFIX.length());
                logger.fine("passing locationId to machine provisioning as " + locationId);
            } else {
                throw new IllegalArgumentException(
                        "The specified zones " + zones + " should include only one zone with the "
                                + CLOUD_ZONE_PREFIX + " prefix:" + locationId);
            }
        }
    }

    final MachineStartRequestedCloudifyEvent machineStartEvent = new MachineStartRequestedCloudifyEvent();
    machineStartEvent.setTemplateName(cloudTemplateName);
    machineStartEvent.setLocationId(locationId);
    machineEventListener.elasticMachineProvisioningProgressChanged(machineStartEvent);

    try {
        final ComputeTemplate template = cloud.getCloudCompute().getTemplates().get(this.cloudTemplateName);
        if (locationId == null) {
            locationId = template.getLocationId();
        }

        // This is the call to the actual cloud driver implementation!
        machineDetails = provisionMachine(locationId, reservationId, duration, unit);

        // This is to protect against a bug in the Admin. see CLOUDIFY-1592
        // (https://cloudifysource.atlassian.net/browse/CLOUDIFY-1592)
        if (!machineDetails.isAgentRunning()) {
            validateMachineIp(machineDetails);
        }

        // Auto populate installer configuration with values set in template
        // if they were not previously set.
        if (machineDetails != null && machineDetails.getInstallerConfiguration() == null) {
            machineDetails.setInstallerConfigutation(template.getInstaller());
        }

    } catch (final Exception e) {
        throw new ElasticMachineProvisioningException("Failed to provision machine: " + e.getMessage(), e);
    }

    logger.info("Machine was provisioned by implementation. Machine is: " + machineDetails);

    // which IP should be used in the cluster
    String machineIp;
    if (cloud.getConfiguration().isConnectToPrivateIp()) {
        machineIp = machineDetails.getPrivateAddress();
    } else {
        machineIp = machineDetails.getPublicAddress();
    }
    if (machineIp == null) {
        throw new IllegalStateException(
                "The IP of the new machine is null! Machine Details are: " + machineDetails + " .");
    }

    final MachineStartedCloudifyEvent machineStartedEvent = new MachineStartedCloudifyEvent();
    machineStartedEvent.setMachineDetails(machineDetails);
    machineStartedEvent.setHostAddress(machineIp);
    machineEventListener.elasticMachineProvisioningProgressChanged(machineStartedEvent);

    final GridServiceAgentStartRequestedEvent agentStartEvent = new GridServiceAgentStartRequestedEvent();
    agentStartEvent.setHostAddress(machineIp);
    agentEventListener.elasticGridServiceAgentProvisioningProgressChanged(agentStartEvent);
    final String volumeId = null;
    try {
        // check for timeout
        checkForProvisioningTimeout(end, machineDetails);

        if (machineDetails.isAgentRunning()) {
            logger.info(
                    "Machine provisioning provided a machine and indicated that an agent is already running");
        } else {
            // install gigaspaces and start agent
            logger.info("Cloudify Adapter is installing Cloudify agent with reservation id " + reservationId
                    + " on " + machineIp);
            installAndStartAgent(machineDetails, reservationId, end);
            // check for timeout again - the installation step can also take
            // a while to complete.
            checkForProvisioningTimeout(end, machineDetails);
        }

        // wait for GSA to become available
        logger.info("Cloudify adapter is waiting for GSA on host: " + machineIp + " with reservation id: "
                + reservationId + " to become available");
        final GridServiceAgent gsa = waitForGsa(machineIp, end, reservationId);
        if (gsa == null) {
            // GSA did not start correctly or on time - shutdown the machine
            throw new TimeoutException("New machine was provisioned and Cloudify was installed, "
                    + "but a GSA was not discovered on the new machine: " + machineDetails);
        }

        // TODO: Derive cloudify specific event and include more event details as specified in CLOUDIFY-10651
        agentEventListener.elasticGridServiceAgentProvisioningProgressChanged(
                new GridServiceAgentStartedEvent(machineIp, gsa.getUid()));

        // check that the agent is really started with the expected env variable of the template
        // we inject this variable earlier on to the bootstrap-management.sh script
        if (gsa.getVirtualMachine().getDetails().getEnvironmentVariables()
                .get(CloudifyConstants.GIGASPACES_CLOUD_TEMPLATE_NAME) == null) {
            throw new ElasticGridServiceAgentProvisioningException(
                    "an agent was started. but the property " + CloudifyConstants.GIGASPACES_CLOUD_TEMPLATE_NAME
                            + " was missing from its environment variables.");
        }

        return gsa;
    } catch (final ElasticMachineProvisioningException e) {
        logger.info("ElasticMachineProvisioningException occurred, " + e.getMessage());
        logger.info(ExceptionUtils.getFullStackTrace(e));
        handleExceptionAfterMachineCreated(machineIp, volumeId, machineDetails, end, reservationId);
        throw e;
    } catch (final ElasticGridServiceAgentProvisioningException e) {
        logger.info("ElasticGridServiceAgentProvisioningException occurred, " + e.getMessage());
        logger.info(ExceptionUtils.getFullStackTrace(e));
        handleExceptionAfterMachineCreated(machineIp, volumeId, machineDetails, end, reservationId);
        throw e;
    } catch (final TimeoutException e) {
        logger.info("TimeoutException occurred, " + e.getMessage());
        logger.info(ExceptionUtils.getFullStackTrace(e));
        handleExceptionAfterMachineCreated(machineIp, volumeId, machineDetails, end, reservationId);
        throw e;
    } catch (final InterruptedException e) {
        logger.info("InterruptedException occurred, " + e.getMessage());
        logger.info(ExceptionUtils.getFullStackTrace(e));
        handleExceptionAfterMachineCreated(machineIp, volumeId, machineDetails, end, reservationId);
        throw e;
    } catch (final Throwable e) {
        logger.info("Unexpected exception occurred, " + e.getMessage());
        logger.info(ExceptionUtils.getFullStackTrace(e));
        handleExceptionAfterMachineCreated(machineIp, volumeId, machineDetails, end, reservationId);
        throw new IllegalStateException("Unexpected exception during machine provisioning", e);
    }
}

From source file:org.cloudifysource.esc.driver.provisioning.byon.ByonProvisioningDriver.java

private void stopAgentAndWait(final int expectedGsmCount, final String ipAddress)
        throws TimeoutException, InterruptedException {

    if (admin == null) {
        final Integer discoveryPort = getLusPort();
        admin = Utils.getAdminObject(ipAddress, expectedGsmCount, discoveryPort);
    }//w w  w.ja v a2s .c om

    final Map<String, GridServiceAgent> agentsMap = admin.getGridServiceAgents().getHostAddress();
    // GridServiceAgent agent = agentsMap.get(ipAddress);
    GSA agent = null;
    for (final Entry<String, GridServiceAgent> agentEntry : agentsMap.entrySet()) {
        if (IPUtils.isSameIpAddress(agentEntry.getKey(), ipAddress)
                || agentEntry.getKey().equalsIgnoreCase(ipAddress)) {
            agent = ((InternalGridServiceAgent) agentEntry.getValue()).getGSA();
        }
    }

    if (agent != null) {
        logger.info("ByonProvisioningDriver: shutting down agent on server: " + ipAddress);
        try {
            admin.close();
            agent.shutdown();
        } catch (final RemoteException e) {
            if (!NetworkExceptionHelper.isConnectOrCloseException(e)) {
                logger.log(Level.FINER, "Failed to shutdown GSA", e);
                throw new AdminException("Failed to shutdown GSA", e);
            }
        }

        final long end = System.currentTimeMillis()
                + TimeUnit.MINUTES.toMillis(AGENT_SHUTDOWN_TIMEOUT_IN_MINUTES);
        boolean agentUp = isAgentUp(agent);
        while (agentUp && System.currentTimeMillis() < end) {
            logger.fine("next check in " + TimeUnit.MILLISECONDS.toSeconds(THREAD_WAITING_IDLE_TIME_IN_SECS)
                    + " seconds");
            Thread.sleep(TimeUnit.SECONDS.toMillis(THREAD_WAITING_IDLE_TIME_IN_SECS));
            agentUp = isAgentUp(agent);
        }

        if (!agentUp && System.currentTimeMillis() >= end) {
            throw new TimeoutException("Agent shutdown timed out (agent IP: " + ipAddress + ")");
        }
    }
}

From source file:com.vmware.photon.controller.common.xenon.ServiceHostUtils.java

public static <H extends ServiceHost> void deleteAllDocuments(H host, String referrer, long timeout,
        TimeUnit timeUnit) throws Throwable {
    QueryTask.Query selfLinkClause = new QueryTask.Query()
            .setTermPropertyName(ServiceDocument.FIELD_NAME_SELF_LINK).setTermMatchValue("/photon/*")
            .setTermMatchType(QueryTask.QueryTerm.MatchType.WILDCARD);

    QueryTask.QuerySpecification querySpecification = new QueryTask.QuerySpecification();
    querySpecification.query.addBooleanClause(selfLinkClause);
    QueryTask queryTask = QueryTask.create(querySpecification).setDirect(true);

    NodeGroupBroadcastResponse queryResponse = ServiceHostUtils.sendBroadcastQueryAndWait(host, referrer,
            queryTask);//  w  ww . j a v  a2 s .c  om

    Set<String> documentLinks = QueryTaskUtils.getBroadcastQueryDocumentLinks(queryResponse);

    if (documentLinks == null || documentLinks.size() <= 0) {
        return;
    }

    CountDownLatch latch = new CountDownLatch(1);

    OperationJoin.JoinedCompletionHandler handler = new OperationJoin.JoinedCompletionHandler() {
        @Override
        public void handle(Map<Long, Operation> ops, Map<Long, Throwable> failures) {
            if (failures != null && !failures.isEmpty()) {
                for (Throwable e : failures.values()) {
                    logger.error("deleteAllDocuments failed", e);
                }
            }
            latch.countDown();
        }
    };

    Collection<Operation> deletes = new LinkedList<>();
    for (String documentLink : documentLinks) {
        Operation deleteOperation = Operation.createDelete(UriUtils.buildUri(host, documentLink)).setBody("{}")
                .setReferer(UriUtils.buildUri(host, referrer));
        deletes.add(deleteOperation);
    }

    OperationJoin join = OperationJoin.create(deletes);
    join.setCompletion(handler);
    join.sendWith(host);
    if (!latch.await(timeout, timeUnit)) {
        throw new TimeoutException(String
                .format("Deletion of all documents timed out. Timeout:{%s}, TimeUnit:{%s}", timeout, timeUnit));
    }
}

From source file:com.microsoft.azurebatch.jenkins.azurebatch.AzureBatchHelper.java

private void waitForPoolReady(String poolId)
        throws BatchErrorException, IOException, InterruptedException, TimeoutException {
    long startTime = System.currentTimeMillis();
    long elapsedTime = 0L;
    boolean poolSteady = false;

    // Wait max 15 minutes for pool to reach steady
    final long maxPoolSteadyWaitTimeInMinutes = 15;
    Logger.log(listener, String.format("Waiting for pool %s steady...", poolId));
    while (elapsedTime < maxPoolSteadyWaitTimeInMinutes * 60 * 1000) {
        CloudPool pool = client.poolOperations().getPool(poolId);
        if (pool.allocationState() == AllocationState.STEADY) {
            poolSteady = true;/*from   w  w  w. j av  a  2  s. co  m*/
            break;
        }

        Thread.sleep(15 * 1000);
        elapsedTime = System.currentTimeMillis() - startTime;
    }

    if (!poolSteady) {
        throw new TimeoutException(String.format("Pool %s is not steady after %d minutes.", poolId,
                maxPoolSteadyWaitTimeInMinutes));
    } else {
        Logger.log(listener, "Pool %s is steady.", poolId);
    }
}

From source file:com.microsoft.azurebatch.jenkins.azurebatch.AzureBatchHelper.java

private void waitForAtLeastOneVmReady(String poolId)
        throws BatchErrorException, IOException, InterruptedException, TimeoutException {
    long startTime = System.currentTimeMillis();
    long elapsedTime = 0L;
    boolean vmReady = false;

    // Wait max 20 minutes for VM to start up
    final long maxVmIdleWaitTimeInMinutes = 20;
    Logger.log(listener, String.format("Waiting for pool %s at least one VM ready...", poolId));
    while (elapsedTime < maxVmIdleWaitTimeInMinutes * 60 * 1000) {

        List<ComputeNode> nodeCollection = client.computeNodeOperations().listComputeNodes(poolId,
                new DetailLevel.Builder().withSelectClause("state")
                        .withFilterClause("state eq 'idle' or state eq 'running'").build());
        for (ComputeNode node : nodeCollection) {
            ComputeNodeState nodeState = node.state();
            if (nodeState == ComputeNodeState.IDLE || nodeState == ComputeNodeState.RUNNING) {
                vmReady = true;//from   w w  w.  j a v  a 2  s .  co m
                break;
            }
        }

        if (vmReady) {
            break;
        }

        long nextWaitTime = 15 * 1000 - (System.currentTimeMillis() - startTime - elapsedTime);
        if (nextWaitTime > 0) {
            Thread.sleep(nextWaitTime);
        }

        elapsedTime = System.currentTimeMillis() - startTime;
    }

    if (!vmReady) {
        throw new TimeoutException(
                String.format("Pool %s no VM is ready after %d minutes.", poolId, maxVmIdleWaitTimeInMinutes));
    } else {
        Logger.log(listener, "Pool %s at least one VM is ready.", poolId);
    }
}