List of usage examples for java.util.concurrent TimeoutException TimeoutException
public TimeoutException(String message)
From source file:com.vmware.photon.controller.common.dcp.ServiceHostUtils.java
public static <H extends ServiceHost> void deleteAllDocuments(H host, String referrer, long timeout, TimeUnit timeUnit) throws Throwable { QueryTask.Query selfLinkClause = new QueryTask.Query() .setTermPropertyName(ServiceDocument.FIELD_NAME_SELF_LINK).setTermMatchValue("*") .setTermMatchType(QueryTask.QueryTerm.MatchType.WILDCARD); QueryTask.QuerySpecification querySpecification = new QueryTask.QuerySpecification(); querySpecification.query.addBooleanClause(selfLinkClause); QueryTask queryTask = QueryTask.create(querySpecification).setDirect(true); NodeGroupBroadcastResponse queryResponse = ServiceHostUtils.sendBroadcastQueryAndWait(host, referrer, queryTask);//from w ww . j a v a2 s .c om Set<String> documentLinks = QueryTaskUtils.getBroadcastQueryResults(queryResponse); if (documentLinks == null || documentLinks.size() <= 0) { return; } CountDownLatch latch = new CountDownLatch(1); OperationJoin.JoinedCompletionHandler handler = new OperationJoin.JoinedCompletionHandler() { @Override public void handle(Map<Long, Operation> ops, Map<Long, Throwable> failures) { if (failures != null && !failures.isEmpty()) { for (Throwable e : failures.values()) { logger.error("deleteAllDocuments failed", e); } } latch.countDown(); } }; Collection<Operation> deletes = new LinkedList<>(); for (String documentLink : documentLinks) { Operation deleteOperation = Operation.createDelete(UriUtils.buildUri(host, documentLink)).setBody("{}") .setReferer(UriUtils.buildUri(host, referrer)); deletes.add(deleteOperation); } OperationJoin join = OperationJoin.create(deletes); join.setCompletion(handler); join.sendWith(host); if (!latch.await(timeout, timeUnit)) { throw new TimeoutException(String .format("Deletion of all documents timed out. Timeout:{%s}, TimeUnit:{%s}", timeout, timeUnit)); } }
From source file:brooklyn.util.internal.ssh.sshj.SshjTool.java
/** * Executes the script in the background (`nohup ... &`), and then executes other ssh commands to poll for the * stdout, stderr and exit code of that original process (which will each have been written to separate files). * /*w w w .j av a 2 s . com*/ * The polling is a "long poll". That is, it executes a long-running ssh command to retrieve the stdout, etc. * If that long-poll command fails, then we just execute another one to pick up from where it left off. * This means we do not need to execute many ssh commands (which are expensive), but can still return promptly * when the command completes. * * Much of this was motivated by https://issues.apache.org/jira/browse/BROOKLYN-106, which is no longer * an issue. The retries (e.g. in the upload-script) are arguably overkill given that {@link #acquire(SshAction)} * will already retry. However, leaving this in place as it could prove useful when working with flakey * networks in the future. * * TODO There are (probably) issues with this method when using {@link ShellTool#PROP_RUN_AS_ROOT}. * I (Aled) saw the .pid file having an owner of root:root, and a failure message in stderr of: * -bash: line 3: /tmp/brooklyn-20150113-161203056-XMEo-move_install_dir_from_user_to_.pid: Permission denied */ protected int execScriptAsyncAndPoll(final Map<String, ?> props, final List<String> commands, final Map<String, ?> env) { return new ToolAbstractAsyncExecScript(props) { private int maxConsecutiveSshFailures = 3; private Duration maxDelayBetweenPolls = Duration.seconds(20); private Duration pollTimeout = getOptionalVal(props, PROP_EXEC_ASYNC_POLLING_TIMEOUT, Duration.FIVE_MINUTES); private int iteration = 0; private int consecutiveSshFailures = 0; private int stdoutCount = 0; private int stderrCount = 0; private Stopwatch timer; public int run() { timer = Stopwatch.createStarted(); final String scriptContents = toScript(props, commands, env); if (LOG.isTraceEnabled()) LOG.trace("Running shell command at {} as async script: {}", host, scriptContents); // Upload script; try repeatedly because have seen timeout intermittently on vcloud-director (BROOKLYN-106 related). boolean uploadSuccess = Repeater .create("async script upload on " + SshjTool.this.toString() + " (for " + getSummary() + ")") .backoffTo(maxDelayBetweenPolls).limitIterationsTo(3).rethrowException() .until(new Callable<Boolean>() { @Override public Boolean call() throws Exception { iteration++; if (LOG.isDebugEnabled()) { String msg = "Uploading (iteration=" + iteration + ") for async script on " + SshjTool.this.toString() + " (for " + getSummary() + ")"; if (iteration == 1) { LOG.trace(msg); } else { LOG.debug(msg); } } copyToServer(ImmutableMap.of("permissions", "0700"), scriptContents.getBytes(), scriptPath); return true; } }).run(); if (!uploadSuccess) { // Unexpected! Should have either returned true or have rethrown the exception; should never get false. String msg = "Unexpected state: repeated failure for async script upload on " + SshjTool.this.toString() + " (" + getSummary() + ")"; LOG.warn(msg + "; rethrowing"); throw new IllegalStateException(msg); } // Execute script asynchronously int execResult = asInt(acquire(new ShellAction(buildRunScriptCommand(), out, err, execTimeout)), -1); if (execResult != 0) return execResult; // Long polling to get the status try { final AtomicReference<Integer> result = new AtomicReference<Integer>(); boolean success = Repeater .create("async script long-poll on " + SshjTool.this.toString() + " (for " + getSummary() + ")") .backoffTo(maxDelayBetweenPolls).limitTimeTo(execTimeout) .until(new Callable<Boolean>() { @Override public Boolean call() throws Exception { iteration++; if (LOG.isDebugEnabled()) LOG.debug("Doing long-poll (iteration=" + iteration + ") for async script to complete on " + SshjTool.this.toString() + " (for " + getSummary() + ")"); Integer exitstatus = longPoll(); result.set(exitstatus); return exitstatus != null; } }).run(); if (!success) { // Timed out String msg = "Timeout for async script to complete on " + SshjTool.this.toString() + " (" + getSummary() + ")"; LOG.warn(msg + "; rethrowing"); throw new TimeoutException(msg); } return result.get(); } catch (Exception e) { LOG.debug("Problem polling for async script on " + SshjTool.this.toString() + " (for " + getSummary() + "); rethrowing after deleting temporary files", e); throw Exceptions.propagate(e); } finally { // Delete the temporary files created (and the `tail -c` commands that might have been left behind by long-polls). // Using pollTimeout so doesn't wait forever, but waits for a reasonable (configurable) length of time. // TODO also execute this if the `buildRunScriptCommand` fails, as that might have left files behind? try { int execDeleteResult = asInt( acquire(new ShellAction(deleteTemporaryFilesCommand(), out, err, pollTimeout)), -1); if (execDeleteResult != 0) { LOG.debug("Problem deleting temporary files of async script on " + SshjTool.this.toString() + " (for " + getSummary() + "): exit status " + execDeleteResult); } } catch (Exception e) { Exceptions.propagateIfFatal(e); LOG.debug("Problem deleting temporary files of async script on " + SshjTool.this.toString() + " (for " + getSummary() + "); continuing", e); } } } Integer longPoll() throws IOException { // Long-polling to get stdout, stderr + exit status of async task. // If our long-poll disconnects, we will just re-execute. // We wrap the stdout/stderr so that we can get the size count. // If we disconnect, we will pick up from that char of the stream. // TODO Additional stdout/stderr written by buildLongPollCommand() could interfere, // causing us to miss some characters. Duration nextPollTimeout = Duration.min(pollTimeout, Duration.millis(execTimeout.toMilliseconds() - timer.elapsed(TimeUnit.MILLISECONDS))); CountingOutputStream countingOut = (out == null) ? null : new CountingOutputStream(out); CountingOutputStream countingErr = (err == null) ? null : new CountingOutputStream(err); List<String> pollCommand = buildLongPollCommand(stdoutCount, stderrCount, nextPollTimeout); Duration sshJoinTimeout = nextPollTimeout.add(Duration.TEN_SECONDS); ShellAction action = new ShellAction(pollCommand, countingOut, countingErr, sshJoinTimeout); int longPollResult; try { longPollResult = asInt(acquire(action, 3, nextPollTimeout), -1); } catch (RuntimeTimeoutException e) { if (LOG.isDebugEnabled()) LOG.debug("Long-poll timed out on " + SshjTool.this.toString() + " (for " + getSummary() + "): " + e); return null; } stdoutCount += (countingOut == null) ? 0 : countingOut.getCount(); stderrCount += (countingErr == null) ? 0 : countingErr.getCount(); if (longPollResult == 0) { if (LOG.isDebugEnabled()) LOG.debug("Long-poll succeeded (exit status 0) on " + SshjTool.this.toString() + " (for " + getSummary() + ")"); return longPollResult; // success } else if (longPollResult == -1) { // probably a connection failure; try again if (LOG.isDebugEnabled()) LOG.debug("Long-poll received exit status -1; will retry on " + SshjTool.this.toString() + " (for " + getSummary() + ")"); return null; } else if (longPollResult == 125) { // 125 is the special code for timeout in long-poll (see buildLongPollCommand). // However, there is a tiny chance that the underlying command might have returned that exact exit code! // Don't treat a timeout as a "consecutiveSshFailure". if (LOG.isDebugEnabled()) LOG.debug("Long-poll received exit status " + longPollResult + "; most likely timeout; retrieving actual status on " + SshjTool.this.toString() + " (for " + getSummary() + ")"); return retrieveStatusCommand(); } else { // want to double-check whether this is the exit-code from the async process, or // some unexpected failure in our long-poll command. if (LOG.isDebugEnabled()) LOG.debug("Long-poll received exit status " + longPollResult + "; retrieving actual status on " + SshjTool.this.toString() + " (for " + getSummary() + ")"); Integer result = retrieveStatusCommand(); if (result != null) { return result; } } consecutiveSshFailures++; if (consecutiveSshFailures > maxConsecutiveSshFailures) { LOG.warn("Aborting on " + consecutiveSshFailures + " consecutive ssh connection errors (return -1) when polling for async script to complete on " + SshjTool.this.toString() + " (" + getSummary() + ")"); return -1; } else { LOG.info("Retrying after ssh connection error when polling for async script to complete on " + SshjTool.this.toString() + " (" + getSummary() + ")"); return null; } } Integer retrieveStatusCommand() throws IOException { // want to double-check whether this is the exit-code from the async process, or // some unexpected failure in our long-poll command. ByteArrayOutputStream statusOut = new ByteArrayOutputStream(); ByteArrayOutputStream statusErr = new ByteArrayOutputStream(); int statusResult = asInt( acquire(new ShellAction(buildRetrieveStatusCommand(), statusOut, statusErr, execTimeout)), -1); if (statusResult == 0) { // The status we retrieved really is valid; return it. // TODO How to ensure no additional output in stdout/stderr when parsing below? String statusOutStr = new String(statusOut.toByteArray()).trim(); if (Strings.isEmpty(statusOutStr)) { // suggests not yet completed; will retry with long-poll if (LOG.isDebugEnabled()) LOG.debug( "Long-poll retrieved status directly; command successful but no result available on " + SshjTool.this.toString() + " (for " + getSummary() + ")"); return null; } else { if (LOG.isDebugEnabled()) LOG.debug("Long-poll retrieved status directly; returning '" + statusOutStr + "' on " + SshjTool.this.toString() + " (for " + getSummary() + ")"); int result = Integer.parseInt(statusOutStr); return result; } } else if (statusResult == -1) { // probably a connection failure; try again with long-poll if (LOG.isDebugEnabled()) LOG.debug("Long-poll retrieving status directly received exit status -1; will retry on " + SshjTool.this.toString() + " (for " + getSummary() + ")"); return null; } else { if (out != null) { out.write(toUTF8ByteArray( "retrieving status failed with exit code " + statusResult + " (stdout follow)")); out.write(statusOut.toByteArray()); } if (err != null) { err.write(toUTF8ByteArray( "retrieving status failed with exit code " + statusResult + " (stderr follow)")); err.write(statusErr.toByteArray()); } if (LOG.isDebugEnabled()) LOG.debug("Long-poll retrieving status failed; returning " + statusResult + " on " + SshjTool.this.toString() + " (for " + getSummary() + ")"); return statusResult; } } }.run(); }
From source file:org.apache.tinkerpop.gremlin.server.op.AbstractEvalOpProcessor.java
/** * Called by {@link #evalOpInternal} when iterating a result set. Implementers should respect the * {@link Settings#serializedResponseTimeout} configuration and break the serialization process if * it begins to take too long to do so, throwing a {@link java.util.concurrent.TimeoutException} in such * cases./* w ww. j a v a 2 s .co m*/ * * @param context The Gremlin Server {@link Context} object containing settings, request message, etc. * @param itty The result to iterator * @throws TimeoutException if the time taken to serialize the entire result set exceeds the allowable time. */ protected void handleIterator(final Context context, final Iterator itty) throws TimeoutException, InterruptedException { final ChannelHandlerContext ctx = context.getChannelHandlerContext(); final RequestMessage msg = context.getRequestMessage(); final Settings settings = context.getSettings(); final MessageSerializer serializer = ctx.channel().attr(StateKey.SERIALIZER).get(); final boolean useBinary = ctx.channel().attr(StateKey.USE_BINARY).get(); boolean warnOnce = false; // sessionless requests are always transaction managed, but in-session requests are configurable. final boolean managedTransactionsForRequest = manageTransactions ? true : (Boolean) msg.getArgs().getOrDefault(Tokens.ARGS_MANAGE_TRANSACTION, false); // we have an empty iterator - happens on stuff like: g.V().iterate() if (!itty.hasNext()) { // as there is nothing left to iterate if we are transaction managed then we should execute a // commit here before we send back a NO_CONTENT which implies success if (managedTransactionsForRequest) attemptCommit(msg, context.getGraphManager(), settings.strictTransactionManagement); ctx.writeAndFlush(ResponseMessage.build(msg).code(ResponseStatusCode.NO_CONTENT).create()); return; } // timer for the total serialization time final StopWatch stopWatch = new StopWatch(); stopWatch.start(); // the batch size can be overridden by the request final int resultIterationBatchSize = (Integer) msg.optionalArgs(Tokens.ARGS_BATCH_SIZE) .orElse(settings.resultIterationBatchSize); List<Object> aggregate = new ArrayList<>(resultIterationBatchSize); // use an external control to manage the loop as opposed to just checking hasNext() in the while. this // prevent situations where auto transactions create a new transaction after calls to commit() withing // the loop on calls to hasNext(). boolean hasMore = itty.hasNext(); while (hasMore) { if (Thread.interrupted()) throw new InterruptedException(); // have to check the aggregate size because it is possible that the channel is not writeable (below) // so iterating next() if the message is not written and flushed would bump the aggregate size beyond // the expected resultIterationBatchSize. Total serialization time for the response remains in // effect so if the client is "slow" it may simply timeout. if (aggregate.size() < resultIterationBatchSize) aggregate.add(itty.next()); // send back a page of results if batch size is met or if it's the end of the results being iterated. // also check writeability of the channel to prevent OOME for slow clients. if (ctx.channel().isWritable()) { if (aggregate.size() == resultIterationBatchSize || !itty.hasNext()) { final ResponseStatusCode code = itty.hasNext() ? ResponseStatusCode.PARTIAL_CONTENT : ResponseStatusCode.SUCCESS; // serialize here because in sessionless requests the serialization must occur in the same // thread as the eval. as eval occurs in the GremlinExecutor there's no way to get back to the // thread that processed the eval of the script so, we have to push serialization down into that Frame frame; try { frame = makeFrame(ctx, msg, serializer, useBinary, aggregate, code); } catch (Exception ex) { // exception is handled in makeFrame() - serialization error gets written back to driver // at that point if (manageTransactions) attemptRollback(msg, context.getGraphManager(), settings.strictTransactionManagement); break; } // only need to reset the aggregation list if there's more stuff to write if (itty.hasNext()) aggregate = new ArrayList<>(resultIterationBatchSize); else { // iteration and serialization are both complete which means this finished successfully. note that // errors internal to script eval or timeout will rollback given GremlinServer's global configurations. // local errors will get rolledback below because the exceptions aren't thrown in those cases to be // caught by the GremlinExecutor for global rollback logic. this only needs to be committed if // there are no more items to iterate and serialization is complete if (managedTransactionsForRequest) attemptCommit(msg, context.getGraphManager(), settings.strictTransactionManagement); // exit the result iteration loop as there are no more results left. using this external control // because of the above commit. some graphs may open a new transaction on the call to // hasNext() hasMore = false; } // the flush is called after the commit has potentially occurred. in this way, if a commit was // required then it will be 100% complete before the client receives it. the "frame" at this point // should have completely detached objects from the transaction (i.e. serialization has occurred) // so a new one should not be opened on the flush down the netty pipeline ctx.writeAndFlush(frame); } } else { // don't keep triggering this warning over and over again for the same request if (!warnOnce) { logger.warn( "Pausing response writing as writeBufferHighWaterMark exceeded on {} - writing will continue once client has caught up", msg); warnOnce = true; } // since the client is lagging we can hold here for a period of time for the client to catch up. // this isn't blocking the IO thread - just a worker. TimeUnit.MILLISECONDS.sleep(10); } stopWatch.split(); if (stopWatch.getSplitTime() > settings.serializedResponseTimeout) { final String timeoutMsg = String.format( "Serialization of the entire response exceeded the 'serializeResponseTimeout' setting %s", warnOnce ? "[Gremlin Server paused writes to client as messages were not being consumed quickly enough]" : ""); throw new TimeoutException(timeoutMsg.trim()); } stopWatch.unsplit(); } stopWatch.stop(); }
From source file:com.emc.vipr.services.s3.FileAccessTest.java
/** * waits until the target access mode is completely transitioned on the specified bucket. * * @param bucketName bucket name/*w w w.ja v a 2s. c o m*/ * @param targetMode target access mode to wait for (readOnly, readWrite, or disabled). Can be null if target mode * is unknown (if you're disabling a portion of the bucket and don't know if there * are still exported objects) * @param timeout after the specified number of seconds, this method will throw a TimeoutException * @throws InterruptedException if interrupted while sleeping between GET intervals * @throws TimeoutException if the specified timeout is reached before transition is complete */ protected void waitForTransition(String bucketName, ViPRConstants.FileAccessMode targetMode, int timeout) throws InterruptedException, TimeoutException { if (targetMode != null && targetMode.isTransitionState()) throw new IllegalArgumentException("Invalid target mode: " + targetMode); long start = System.currentTimeMillis(), interval = 500; timeout *= 1000; while (true) { // GET the current access mode BucketFileAccessModeResult result = viprS3.getBucketFileAccessMode(bucketName); if (targetMode == null) { if (!result.getAccessMode().isTransitionState()) { return; // must be complete since the bucket is not in a transition state } } else { if (targetMode == result.getAccessMode()) { return; // transition is complete } if (!result.getAccessMode().isTransitionState() || !result.getAccessMode().transitionsToTarget(targetMode)) throw new RuntimeException(String.format("Bucket %s in mode %s will never get to mode %s", bucketName, result.getAccessMode(), targetMode)); } // if we've reached our timeout long runTime = System.currentTimeMillis() - start; if (runTime >= timeout) throw new TimeoutException(String.format( "Access mode transition for %s took longer than %d seconds", bucketName, timeout / 1000)); // transitioning; wait and query again long timeLeft = timeout - runTime; Thread.sleep(Math.min(timeLeft, interval)); } }
From source file:com.linkedin.databus2.core.container.netty.ServerContainer.java
public void awaitShutdown(long timeoutMs) throws TimeoutException, InterruptedException { long startTs = System.currentTimeMillis(); long endTs = startTs + timeoutMs; _controlLock.lock();/*from w w w. j a v a 2s.co m*/ try { long waitTime; while (!_shutdownRequest && (waitTime = endTs - System.currentTimeMillis()) > 0) { LOG.info("waiting for shutdown request for container id: " + _containerStaticConfig.getId()); if (!_shutdownCondition.await(waitTime, TimeUnit.MILLISECONDS)) break; } } finally { _controlLock.unlock(); } if (!_shutdownRequest) { LOG.error("timeout waiting for a shutdown request"); throw new TimeoutException("timeout waiting for shutdown request"); } _controlLock.lock(); try { long waitTime; while (!_shutdown && (waitTime = endTs - System.currentTimeMillis()) > 0) { LOG.info("Waiting for shutdown complete for serving container: " + _containerStaticConfig.getId()); if (!_shutdownFinishedCondition.await(waitTime, TimeUnit.MILLISECONDS)) break; } } finally { _controlLock.unlock(); } if (!_shutdown) { LOG.error("timeout waiting for shutdown"); throw new TimeoutException("timeout waiting for shutdown to complete"); } }
From source file:org.cloudifysource.esc.driver.provisioning.ElasticMachineProvisioningCloudifyAdapter.java
@Override public GridServiceAgent startMachine(final ExactZonesConfig zones, final GSAReservationId reservationId, final long duration, final TimeUnit unit) throws ElasticMachineProvisioningException, ElasticGridServiceAgentProvisioningException, InterruptedException, TimeoutException { logger.info("Cloudify Adapter is starting a new machine with zones " + zones.getZones() + " and reservation id " + reservationId); // calculate timeout final long end = System.currentTimeMillis() + unit.toMillis(duration); // provision the machine logger.info("Calling provisioning implementation for new machine"); MachineDetails machineDetails;/*from ww w. j a va2s . c om*/ cloudifyProvisioning.setAdmin(getGlobalAdminInstance(originalESMAdmin)); final ZonesConfig defaultZones = config.getGridServiceAgentZones(); logger.fine("default zones = " + defaultZones.getZones()); if (!defaultZones.isSatisfiedBy(zones)) { throw new IllegalArgumentException( "The specified zones " + zones + " does not satisfy the configuration zones " + defaultZones); } String locationId = null; logger.fine("searching for cloud specific zone"); for (final String zone : zones.getZones()) { logger.fine("current zone = " + zone); if (zone.startsWith(CLOUD_ZONE_PREFIX)) { logger.fine("found a zone with " + CLOUD_ZONE_PREFIX + " prefix : " + zone); if (locationId == null) { locationId = zone.substring(CLOUD_ZONE_PREFIX.length()); logger.fine("passing locationId to machine provisioning as " + locationId); } else { throw new IllegalArgumentException( "The specified zones " + zones + " should include only one zone with the " + CLOUD_ZONE_PREFIX + " prefix:" + locationId); } } } final MachineStartRequestedCloudifyEvent machineStartEvent = new MachineStartRequestedCloudifyEvent(); machineStartEvent.setTemplateName(cloudTemplateName); machineStartEvent.setLocationId(locationId); machineEventListener.elasticMachineProvisioningProgressChanged(machineStartEvent); try { final ComputeTemplate template = cloud.getCloudCompute().getTemplates().get(this.cloudTemplateName); if (locationId == null) { locationId = template.getLocationId(); } // This is the call to the actual cloud driver implementation! machineDetails = provisionMachine(locationId, reservationId, duration, unit); // This is to protect against a bug in the Admin. see CLOUDIFY-1592 // (https://cloudifysource.atlassian.net/browse/CLOUDIFY-1592) if (!machineDetails.isAgentRunning()) { validateMachineIp(machineDetails); } // Auto populate installer configuration with values set in template // if they were not previously set. if (machineDetails != null && machineDetails.getInstallerConfiguration() == null) { machineDetails.setInstallerConfigutation(template.getInstaller()); } } catch (final Exception e) { throw new ElasticMachineProvisioningException("Failed to provision machine: " + e.getMessage(), e); } logger.info("Machine was provisioned by implementation. Machine is: " + machineDetails); // which IP should be used in the cluster String machineIp; if (cloud.getConfiguration().isConnectToPrivateIp()) { machineIp = machineDetails.getPrivateAddress(); } else { machineIp = machineDetails.getPublicAddress(); } if (machineIp == null) { throw new IllegalStateException( "The IP of the new machine is null! Machine Details are: " + machineDetails + " ."); } final MachineStartedCloudifyEvent machineStartedEvent = new MachineStartedCloudifyEvent(); machineStartedEvent.setMachineDetails(machineDetails); machineStartedEvent.setHostAddress(machineIp); machineEventListener.elasticMachineProvisioningProgressChanged(machineStartedEvent); final GridServiceAgentStartRequestedEvent agentStartEvent = new GridServiceAgentStartRequestedEvent(); agentStartEvent.setHostAddress(machineIp); agentEventListener.elasticGridServiceAgentProvisioningProgressChanged(agentStartEvent); final String volumeId = null; try { // check for timeout checkForProvisioningTimeout(end, machineDetails); if (machineDetails.isAgentRunning()) { logger.info( "Machine provisioning provided a machine and indicated that an agent is already running"); } else { // install gigaspaces and start agent logger.info("Cloudify Adapter is installing Cloudify agent with reservation id " + reservationId + " on " + machineIp); installAndStartAgent(machineDetails, reservationId, end); // check for timeout again - the installation step can also take // a while to complete. checkForProvisioningTimeout(end, machineDetails); } // wait for GSA to become available logger.info("Cloudify adapter is waiting for GSA on host: " + machineIp + " with reservation id: " + reservationId + " to become available"); final GridServiceAgent gsa = waitForGsa(machineIp, end, reservationId); if (gsa == null) { // GSA did not start correctly or on time - shutdown the machine throw new TimeoutException("New machine was provisioned and Cloudify was installed, " + "but a GSA was not discovered on the new machine: " + machineDetails); } // TODO: Derive cloudify specific event and include more event details as specified in CLOUDIFY-10651 agentEventListener.elasticGridServiceAgentProvisioningProgressChanged( new GridServiceAgentStartedEvent(machineIp, gsa.getUid())); // check that the agent is really started with the expected env variable of the template // we inject this variable earlier on to the bootstrap-management.sh script if (gsa.getVirtualMachine().getDetails().getEnvironmentVariables() .get(CloudifyConstants.GIGASPACES_CLOUD_TEMPLATE_NAME) == null) { throw new ElasticGridServiceAgentProvisioningException( "an agent was started. but the property " + CloudifyConstants.GIGASPACES_CLOUD_TEMPLATE_NAME + " was missing from its environment variables."); } return gsa; } catch (final ElasticMachineProvisioningException e) { logger.info("ElasticMachineProvisioningException occurred, " + e.getMessage()); logger.info(ExceptionUtils.getFullStackTrace(e)); handleExceptionAfterMachineCreated(machineIp, volumeId, machineDetails, end, reservationId); throw e; } catch (final ElasticGridServiceAgentProvisioningException e) { logger.info("ElasticGridServiceAgentProvisioningException occurred, " + e.getMessage()); logger.info(ExceptionUtils.getFullStackTrace(e)); handleExceptionAfterMachineCreated(machineIp, volumeId, machineDetails, end, reservationId); throw e; } catch (final TimeoutException e) { logger.info("TimeoutException occurred, " + e.getMessage()); logger.info(ExceptionUtils.getFullStackTrace(e)); handleExceptionAfterMachineCreated(machineIp, volumeId, machineDetails, end, reservationId); throw e; } catch (final InterruptedException e) { logger.info("InterruptedException occurred, " + e.getMessage()); logger.info(ExceptionUtils.getFullStackTrace(e)); handleExceptionAfterMachineCreated(machineIp, volumeId, machineDetails, end, reservationId); throw e; } catch (final Throwable e) { logger.info("Unexpected exception occurred, " + e.getMessage()); logger.info(ExceptionUtils.getFullStackTrace(e)); handleExceptionAfterMachineCreated(machineIp, volumeId, machineDetails, end, reservationId); throw new IllegalStateException("Unexpected exception during machine provisioning", e); } }
From source file:org.cloudifysource.esc.driver.provisioning.byon.ByonProvisioningDriver.java
private void stopAgentAndWait(final int expectedGsmCount, final String ipAddress) throws TimeoutException, InterruptedException { if (admin == null) { final Integer discoveryPort = getLusPort(); admin = Utils.getAdminObject(ipAddress, expectedGsmCount, discoveryPort); }//w w w.ja v a2s .c om final Map<String, GridServiceAgent> agentsMap = admin.getGridServiceAgents().getHostAddress(); // GridServiceAgent agent = agentsMap.get(ipAddress); GSA agent = null; for (final Entry<String, GridServiceAgent> agentEntry : agentsMap.entrySet()) { if (IPUtils.isSameIpAddress(agentEntry.getKey(), ipAddress) || agentEntry.getKey().equalsIgnoreCase(ipAddress)) { agent = ((InternalGridServiceAgent) agentEntry.getValue()).getGSA(); } } if (agent != null) { logger.info("ByonProvisioningDriver: shutting down agent on server: " + ipAddress); try { admin.close(); agent.shutdown(); } catch (final RemoteException e) { if (!NetworkExceptionHelper.isConnectOrCloseException(e)) { logger.log(Level.FINER, "Failed to shutdown GSA", e); throw new AdminException("Failed to shutdown GSA", e); } } final long end = System.currentTimeMillis() + TimeUnit.MINUTES.toMillis(AGENT_SHUTDOWN_TIMEOUT_IN_MINUTES); boolean agentUp = isAgentUp(agent); while (agentUp && System.currentTimeMillis() < end) { logger.fine("next check in " + TimeUnit.MILLISECONDS.toSeconds(THREAD_WAITING_IDLE_TIME_IN_SECS) + " seconds"); Thread.sleep(TimeUnit.SECONDS.toMillis(THREAD_WAITING_IDLE_TIME_IN_SECS)); agentUp = isAgentUp(agent); } if (!agentUp && System.currentTimeMillis() >= end) { throw new TimeoutException("Agent shutdown timed out (agent IP: " + ipAddress + ")"); } } }
From source file:com.vmware.photon.controller.common.xenon.ServiceHostUtils.java
public static <H extends ServiceHost> void deleteAllDocuments(H host, String referrer, long timeout, TimeUnit timeUnit) throws Throwable { QueryTask.Query selfLinkClause = new QueryTask.Query() .setTermPropertyName(ServiceDocument.FIELD_NAME_SELF_LINK).setTermMatchValue("/photon/*") .setTermMatchType(QueryTask.QueryTerm.MatchType.WILDCARD); QueryTask.QuerySpecification querySpecification = new QueryTask.QuerySpecification(); querySpecification.query.addBooleanClause(selfLinkClause); QueryTask queryTask = QueryTask.create(querySpecification).setDirect(true); NodeGroupBroadcastResponse queryResponse = ServiceHostUtils.sendBroadcastQueryAndWait(host, referrer, queryTask);// w ww . j a v a2 s .c om Set<String> documentLinks = QueryTaskUtils.getBroadcastQueryDocumentLinks(queryResponse); if (documentLinks == null || documentLinks.size() <= 0) { return; } CountDownLatch latch = new CountDownLatch(1); OperationJoin.JoinedCompletionHandler handler = new OperationJoin.JoinedCompletionHandler() { @Override public void handle(Map<Long, Operation> ops, Map<Long, Throwable> failures) { if (failures != null && !failures.isEmpty()) { for (Throwable e : failures.values()) { logger.error("deleteAllDocuments failed", e); } } latch.countDown(); } }; Collection<Operation> deletes = new LinkedList<>(); for (String documentLink : documentLinks) { Operation deleteOperation = Operation.createDelete(UriUtils.buildUri(host, documentLink)).setBody("{}") .setReferer(UriUtils.buildUri(host, referrer)); deletes.add(deleteOperation); } OperationJoin join = OperationJoin.create(deletes); join.setCompletion(handler); join.sendWith(host); if (!latch.await(timeout, timeUnit)) { throw new TimeoutException(String .format("Deletion of all documents timed out. Timeout:{%s}, TimeUnit:{%s}", timeout, timeUnit)); } }
From source file:com.microsoft.azurebatch.jenkins.azurebatch.AzureBatchHelper.java
private void waitForPoolReady(String poolId) throws BatchErrorException, IOException, InterruptedException, TimeoutException { long startTime = System.currentTimeMillis(); long elapsedTime = 0L; boolean poolSteady = false; // Wait max 15 minutes for pool to reach steady final long maxPoolSteadyWaitTimeInMinutes = 15; Logger.log(listener, String.format("Waiting for pool %s steady...", poolId)); while (elapsedTime < maxPoolSteadyWaitTimeInMinutes * 60 * 1000) { CloudPool pool = client.poolOperations().getPool(poolId); if (pool.allocationState() == AllocationState.STEADY) { poolSteady = true;/*from w w w. j av a 2 s. co m*/ break; } Thread.sleep(15 * 1000); elapsedTime = System.currentTimeMillis() - startTime; } if (!poolSteady) { throw new TimeoutException(String.format("Pool %s is not steady after %d minutes.", poolId, maxPoolSteadyWaitTimeInMinutes)); } else { Logger.log(listener, "Pool %s is steady.", poolId); } }
From source file:com.microsoft.azurebatch.jenkins.azurebatch.AzureBatchHelper.java
private void waitForAtLeastOneVmReady(String poolId) throws BatchErrorException, IOException, InterruptedException, TimeoutException { long startTime = System.currentTimeMillis(); long elapsedTime = 0L; boolean vmReady = false; // Wait max 20 minutes for VM to start up final long maxVmIdleWaitTimeInMinutes = 20; Logger.log(listener, String.format("Waiting for pool %s at least one VM ready...", poolId)); while (elapsedTime < maxVmIdleWaitTimeInMinutes * 60 * 1000) { List<ComputeNode> nodeCollection = client.computeNodeOperations().listComputeNodes(poolId, new DetailLevel.Builder().withSelectClause("state") .withFilterClause("state eq 'idle' or state eq 'running'").build()); for (ComputeNode node : nodeCollection) { ComputeNodeState nodeState = node.state(); if (nodeState == ComputeNodeState.IDLE || nodeState == ComputeNodeState.RUNNING) { vmReady = true;//from w w w. j a v a 2 s . co m break; } } if (vmReady) { break; } long nextWaitTime = 15 * 1000 - (System.currentTimeMillis() - startTime - elapsedTime); if (nextWaitTime > 0) { Thread.sleep(nextWaitTime); } elapsedTime = System.currentTimeMillis() - startTime; } if (!vmReady) { throw new TimeoutException( String.format("Pool %s no VM is ready after %d minutes.", poolId, maxVmIdleWaitTimeInMinutes)); } else { Logger.log(listener, "Pool %s at least one VM is ready.", poolId); } }