List of usage examples for java.lang Process destroyForcibly
public Process destroyForcibly()
From source file:org.sonar.application.process.ProcessLauncherImpl.java
public ProcessMonitor launch(AbstractCommand command) { EsInstallation fileSystem = command.getEsInstallation(); if (fileSystem != null) { cleanupOutdatedEsData(fileSystem); writeConfFiles(fileSystem);//from w w w. j ava 2 s . c o m } Process process; if (command instanceof EsScriptCommand) { process = launchExternal((EsScriptCommand) command); } else if (command instanceof JavaCommand) { process = launchJava((JavaCommand) command); } else { throw new IllegalStateException("Unexpected type of command: " + command.getClass()); } ProcessId processId = command.getProcessId(); try { if (processId == ProcessId.ELASTICSEARCH) { return new EsProcessMonitor(process, processId, command.getEsInstallation(), new EsConnectorImpl()); } else { ProcessCommands commands = allProcessesCommands.createAfterClean(processId.getIpcIndex()); return new ProcessCommandsProcessMonitor(process, processId, commands); } } catch (Exception e) { // just in case if (process != null) { process.destroyForcibly(); } throw new IllegalStateException(format("Fail to launch monitor of process [%s]", processId.getKey()), e); } }
From source file:org.codelibs.fess.helper.ProcessHelper.java
protected int destroyProcess(final String sessionId, final JobProcess jobProcess) { if (jobProcess != null) { final InputStreamThread ist = jobProcess.getInputStreamThread(); try {//from w ww .jav a2 s.c o m ist.interrupt(); } catch (final Exception e) { logger.warn("Could not interrupt a thread of an input stream.", e); } final CountDownLatch latch = new CountDownLatch(3); final Process process = jobProcess.getProcess(); new Thread(() -> { try { CloseableUtil.closeQuietly(process.getInputStream()); } catch (final Exception e) { logger.warn("Could not close a process input stream.", e); } finally { latch.countDown(); } }, "ProcessCloser-input-" + sessionId).start(); new Thread(() -> { try { CloseableUtil.closeQuietly(process.getErrorStream()); } catch (final Exception e) { logger.warn("Could not close a process error stream.", e); } finally { latch.countDown(); } }, "ProcessCloser-error-" + sessionId).start(); new Thread(() -> { try { CloseableUtil.closeQuietly(process.getOutputStream()); } catch (final Exception e) { logger.warn("Could not close a process output stream.", e); } finally { latch.countDown(); } }, "ProcessCloser-output-" + sessionId).start(); try { latch.await(10, TimeUnit.SECONDS); } catch (final InterruptedException e) { logger.warn("Interrupted to wait a process.", e); } try { process.destroyForcibly().waitFor(processDestroyTimeout, TimeUnit.SECONDS); return process.exitValue(); } catch (final Exception e) { logger.error("Could not destroy a process correctly.", e); } } return -1; }
From source file:com.thinkbiganalytics.nifi.v2.spark.ExecuteSparkJob.java
@Override public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { final ComponentLog logger = getLog(); FlowFile flowFile = session.get();/*from w w w . j av a2 s . c o m*/ if (flowFile == null) { return; } String PROVENANCE_JOB_STATUS_KEY = "Job Status"; String PROVENANCE_SPARK_EXIT_CODE_KEY = "Spark Exit Code"; try { PROVENANCE_JOB_STATUS_KEY = context.getName() + " Job Status"; PROVENANCE_SPARK_EXIT_CODE_KEY = context.getName() + " Spark Exit Code"; /* Configuration parameters for spark launcher */ String appJar = context.getProperty(APPLICATION_JAR).evaluateAttributeExpressions(flowFile).getValue() .trim(); String extraJars = context.getProperty(EXTRA_JARS).evaluateAttributeExpressions(flowFile).getValue(); String yarnQueue = context.getProperty(YARN_QUEUE).evaluateAttributeExpressions(flowFile).getValue(); String mainClass = context.getProperty(MAIN_CLASS).evaluateAttributeExpressions(flowFile).getValue() .trim(); String sparkMaster = context.getProperty(SPARK_MASTER).evaluateAttributeExpressions(flowFile).getValue() .trim(); String appArgs = context.getProperty(MAIN_ARGS).evaluateAttributeExpressions(flowFile).getValue() .trim(); String driverMemory = context.getProperty(DRIVER_MEMORY).evaluateAttributeExpressions(flowFile) .getValue(); String executorMemory = context.getProperty(EXECUTOR_MEMORY).evaluateAttributeExpressions(flowFile) .getValue(); String numberOfExecutors = context.getProperty(NUMBER_EXECUTORS).evaluateAttributeExpressions(flowFile) .getValue(); String sparkApplicationName = context.getProperty(SPARK_APPLICATION_NAME) .evaluateAttributeExpressions(flowFile).getValue(); String executorCores = context.getProperty(EXECUTOR_CORES).evaluateAttributeExpressions(flowFile) .getValue(); String networkTimeout = context.getProperty(NETWORK_TIMEOUT).evaluateAttributeExpressions(flowFile) .getValue(); String principal = context.getProperty(kerberosPrincipal).getValue(); String keyTab = context.getProperty(kerberosKeyTab).getValue(); String hadoopConfigurationResources = context.getProperty(HADOOP_CONFIGURATION_RESOURCES).getValue(); String sparkConfs = context.getProperty(SPARK_CONFS).evaluateAttributeExpressions(flowFile).getValue(); String extraFiles = context.getProperty(EXTRA_SPARK_FILES).evaluateAttributeExpressions(flowFile) .getValue(); Integer sparkProcessTimeout = context.getProperty(PROCESS_TIMEOUT) .evaluateAttributeExpressions(flowFile).asTimePeriod(TimeUnit.SECONDS).intValue(); String datasourceIds = context.getProperty(DATASOURCES).evaluateAttributeExpressions(flowFile) .getValue(); MetadataProviderService metadataService = context.getProperty(METADATA_SERVICE) .asControllerService(MetadataProviderService.class); String[] confs = null; if (!StringUtils.isEmpty(sparkConfs)) { confs = sparkConfs.split("\\|"); } String[] args = null; if (!StringUtils.isEmpty(appArgs)) { args = appArgs.split(","); } final List<String> extraJarPaths = new ArrayList<>(); if (!StringUtils.isEmpty(extraJars)) { extraJarPaths.addAll(Arrays.asList(extraJars.split(","))); } else { getLog().info("No extra jars to be added to class path"); } // If all 3 fields are filled out then assume kerberos is enabled, and user should be authenticated boolean authenticateUser = false; if (!StringUtils.isEmpty(principal) && !StringUtils.isEmpty(keyTab) && !StringUtils.isEmpty(hadoopConfigurationResources)) { authenticateUser = true; } if (authenticateUser) { ApplySecurityPolicy applySecurityObject = new ApplySecurityPolicy(); Configuration configuration; try { getLog().info("Getting Hadoop configuration from " + hadoopConfigurationResources); configuration = ApplySecurityPolicy.getConfigurationFromResources(hadoopConfigurationResources); if (SecurityUtil.isSecurityEnabled(configuration)) { getLog().info("Security is enabled"); if (principal.equals("") && keyTab.equals("")) { getLog().error( "Kerberos Principal and Kerberos KeyTab information missing in Kerboeros enabled cluster. {} ", new Object[] { flowFile }); session.transfer(flowFile, REL_FAILURE); return; } try { getLog().info("User authentication initiated"); boolean authenticationStatus = applySecurityObject.validateUserWithKerberos(logger, hadoopConfigurationResources, principal, keyTab); if (authenticationStatus) { getLog().info("User authenticated successfully."); } else { getLog().error("User authentication failed. {} ", new Object[] { flowFile }); session.transfer(flowFile, REL_FAILURE); return; } } catch (Exception unknownException) { getLog().error("Unknown exception occurred while validating user : {}. {} ", new Object[] { unknownException.getMessage(), flowFile }); session.transfer(flowFile, REL_FAILURE); return; } } } catch (IOException e1) { getLog().error("Unknown exception occurred while authenticating user : {} and flow file: {}", new Object[] { e1.getMessage(), flowFile }); session.transfer(flowFile, REL_FAILURE); return; } } String sparkHome = context.getProperty(SPARK_HOME).evaluateAttributeExpressions(flowFile).getValue(); // Build environment final Map<String, String> env = new HashMap<>(); if (StringUtils.isNotBlank(datasourceIds)) { final StringBuilder datasources = new StringBuilder(10240); final ObjectMapper objectMapper = new ObjectMapper(); final MetadataProvider provider = metadataService.getProvider(); for (final String id : datasourceIds.split(",")) { datasources.append((datasources.length() == 0) ? '[' : ','); final Optional<Datasource> datasource = provider.getDatasource(id); if (datasource.isPresent()) { if (datasource.get() instanceof JdbcDatasource && StringUtils .isNotBlank(((JdbcDatasource) datasource.get()).getDatabaseDriverLocation())) { final String[] databaseDriverLocations = ((JdbcDatasource) datasource.get()) .getDatabaseDriverLocation().split(","); extraJarPaths.addAll(Arrays.asList(databaseDriverLocations)); } datasources.append(objectMapper.writeValueAsString(datasource.get())); } else { logger.error("Required datasource {} is missing for Spark job: {}", new Object[] { id, flowFile }); flowFile = session.putAttribute(flowFile, PROVENANCE_JOB_STATUS_KEY, "Invalid data source: " + id); session.transfer(flowFile, REL_FAILURE); return; } } datasources.append(']'); env.put("DATASOURCES", datasources.toString()); } /* Launch the spark job as a child process */ SparkLauncher launcher = new SparkLauncher(env).setAppResource(appJar).setMainClass(mainClass) .setMaster(sparkMaster).setConf(SparkLauncher.DRIVER_MEMORY, driverMemory) .setConf(SPARK_NUM_EXECUTORS, numberOfExecutors) .setConf(SparkLauncher.EXECUTOR_MEMORY, executorMemory) .setConf(SparkLauncher.EXECUTOR_CORES, executorCores) .setConf(SPARK_NETWORK_TIMEOUT_CONFIG_NAME, networkTimeout).setSparkHome(sparkHome) .setAppName(sparkApplicationName); if (authenticateUser) { launcher.setConf(SPARK_YARN_KEYTAB, keyTab); launcher.setConf(SPARK_YARN_PRINCIPAL, principal); } if (args != null) { launcher.addAppArgs(args); } if (confs != null) { for (String conf : confs) { getLog().info("Adding sparkconf '" + conf + "'"); launcher.addSparkArg(SPARK_CONFIG_NAME, conf); } } if (!extraJarPaths.isEmpty()) { for (String path : extraJarPaths) { getLog().info("Adding to class path '" + path + "'"); launcher.addJar(path); } } if (StringUtils.isNotEmpty(yarnQueue)) { launcher.setConf(SPARK_YARN_QUEUE, yarnQueue); } if (StringUtils.isNotEmpty(extraFiles)) { launcher.addSparkArg(SPARK_EXTRA_FILES_CONFIG_NAME, extraFiles); } Process spark = launcher.launch(); /* Read/clear the process input stream */ InputStreamReaderRunnable inputStreamReaderRunnable = new InputStreamReaderRunnable(LogLevel.INFO, logger, spark.getInputStream()); Thread inputThread = new Thread(inputStreamReaderRunnable, "stream input"); inputThread.start(); /* Read/clear the process error stream */ InputStreamReaderRunnable errorStreamReaderRunnable = new InputStreamReaderRunnable(LogLevel.INFO, logger, spark.getErrorStream()); Thread errorThread = new Thread(errorStreamReaderRunnable, "stream error"); errorThread.start(); logger.info("Waiting for Spark job to complete"); /* Wait for job completion */ boolean completed = spark.waitFor(sparkProcessTimeout, TimeUnit.SECONDS); if (!completed) { spark.destroyForcibly(); getLog().error("Spark process timed out after {} seconds using flow file: {} ", new Object[] { sparkProcessTimeout, flowFile }); session.transfer(flowFile, REL_FAILURE); return; } int exitCode = spark.exitValue(); flowFile = session.putAttribute(flowFile, PROVENANCE_SPARK_EXIT_CODE_KEY, exitCode + ""); if (exitCode != 0) { logger.error("ExecuteSparkJob for {} and flowfile: {} completed with failed status {} ", new Object[] { context.getName(), flowFile, exitCode }); flowFile = session.putAttribute(flowFile, PROVENANCE_JOB_STATUS_KEY, "Failed"); session.transfer(flowFile, REL_FAILURE); } else { logger.info("ExecuteSparkJob for {} and flowfile: {} completed with success status {} ", new Object[] { context.getName(), flowFile, exitCode }); flowFile = session.putAttribute(flowFile, PROVENANCE_JOB_STATUS_KEY, "Success"); session.transfer(flowFile, REL_SUCCESS); } } catch (final Exception e) { logger.error("Unable to execute Spark job {},{}", new Object[] { flowFile, e.getMessage() }, e); flowFile = session.putAttribute(flowFile, PROVENANCE_JOB_STATUS_KEY, "Failed With Exception"); flowFile = session.putAttribute(flowFile, "Spark Exception:", e.getMessage()); session.transfer(flowFile, REL_FAILURE); } }
From source file:com.machinepublishers.jbrowserdriver.JBrowserDriver.java
private void endProcess() { if (processEnded.compareAndSet(false, true)) { lock.expired.set(true);/* ww w. j av a2 s. c o m*/ final Process proc = process.get(); if (proc != null) { while (proc.isAlive()) { try { PidProcess pidProcess = Processes.newPidProcess(proc); try { if (!pidProcess.destroyGracefully().waitFor(10, TimeUnit.SECONDS)) { throw new RuntimeException(); } } catch (Throwable t1) { if (!pidProcess.destroyForcefully().waitFor(10, TimeUnit.SECONDS)) { throw new RuntimeException(); } } } catch (Throwable t2) { try { proc.destroyForcibly().waitFor(10, TimeUnit.SECONDS); } catch (Throwable t3) { } } } } FileUtils.deleteQuietly(tmpDir); synchronized (locks) { locks.remove(lock); } synchronized (waiting) { portGroupsActive.remove(configuredPortGroup.get()); waiting.notifyAll(); } } }
From source file:com.stratio.qa.specs.CommonG.java
/** * Runs a command locally/*w ww.j av a2 s.c o m*/ * * @param command command used to be run locally */ public void runLocalCommand(String command) throws Exception { String result = ""; String line; Process p; try { p = Runtime.getRuntime().exec(new String[] { "/bin/sh", "-c", command }); p.waitFor(); } catch (java.io.IOException e) { this.commandExitStatus = 1; this.commandResult = "Error"; return; } BufferedReader input = new BufferedReader(new InputStreamReader(p.getInputStream())); while ((line = input.readLine()) != null) { result += line; } input.close(); this.commandResult = result; this.commandExitStatus = p.exitValue(); p.destroy(); if (p.isAlive()) { p.destroyForcibly(); } }
From source file:org.wildfly.swarm.proc.Monitor.java
/** * Main test execution. Spawns an external process * @param iteration//from www . j a va 2s. c o m * @param file * @param httpCheck * @param collector */ private void runTest(int iteration, File file, String httpCheck, final Collector collector) { System.out.println("Testing " + file.getAbsolutePath() + ", iteration " + iteration); String id = file.getAbsolutePath(); String uid = UUID.randomUUID().toString(); Process process = null; int attempts = 0; try { Path workDir = Files.createDirectories( this.workDir.toPath().resolve(Paths.get(file.getName(), "iteration-" + iteration))); Path tmp = Files.createDirectory(workDir.resolve("tmp")); ProcessBuilder pb = new ProcessBuilder("java", "-Duid=" + uid, "-Djava.io.tmpdir=" + tmp.toAbsolutePath().toString(), "-jar", file.getAbsolutePath()) .redirectOutput(workDir.resolve("stdout.txt").toFile()) .redirectError(workDir.resolve("stderr.txt").toFile()); final long s0 = System.currentTimeMillis(); process = pb.start(); final CloseableHttpClient httpClient = HttpClients.createDefault(); while (true) { if (attempts >= NUM_CONNECTION_ATTEMPTS) { System.out.println("Max attempts reached, escaping sequence"); break; } CloseableHttpResponse response = null; try { HttpGet request = new HttpGet(httpCheck); response = httpClient.execute(request); int statusCode = response.getStatusLine().getStatusCode(); if (statusCode == 200) { collector.onMeasurement(id, Measure.STARTUP_TIME, (double) (System.currentTimeMillis() - s0)); warmup(httpClient, httpCheck); measureMemory(id, uid, collector); measureJarSize(id, file, collector); measureTmpDirSize(id, tmp, collector); break; } else if (statusCode == 404) { // this can happen during server boot, when the HTTP endpoint is already exposed // but the application is not yet deployed } else { System.err.println("Failed to execute HTTP check: " + statusCode); break; } } catch (HttpHostConnectException e) { // server not running yet } finally { if (response != null) { response.close(); } } attempts++; Thread.sleep(MS_BETWEEN_ATTEMPTS); } httpClient.close(); final long s1 = System.currentTimeMillis(); process.destroy(); boolean finished = process.waitFor(2, TimeUnit.SECONDS); if (finished) { collector.onMeasurement(id, Measure.SHUTDOWN_TIME, (double) (System.currentTimeMillis() - s1)); } } catch (Throwable t) { t.printStackTrace(); } finally { if (process != null && process.isAlive()) { process.destroyForcibly(); try { process.waitFor(2, TimeUnit.SECONDS); } catch (InterruptedException e) { e.printStackTrace(); } } } }
From source file:br.com.riselabs.cotonet.builder.commands.ExternalGitCommand.java
/** * OBS: this method returns {@code null} when calling ' * {@code git reset --hard}'.// www . j a v a 2 s . c o m * * @return * @throws IOException */ public List<ConflictChunk<CommandLineBlameResult>> call() throws BlameException { Runtime run = Runtime.getRuntime(); Process pr = null; String cmd = null; String[] env = {}; BufferedReader buf; List<ConflictChunk<CommandLineBlameResult>> conflicts = null; int exitCode; try { switch (type) { case RESET: cmd = "git reset --hard"; pr = run.exec(cmd, env, file); break; case BLAME: default: cmd = "git blame -p --line-porcelain"; env = new String[1]; // we need this to disable the pager env[0] = "GIT_PAGER=cat"; pr = run.exec(cmd + " " + file, env, file.getParentFile()); // parse output buf = new BufferedReader(new InputStreamReader(pr.getInputStream())); conflicts = new ArrayList<ConflictChunk<CommandLineBlameResult>>(); final String CONFLICT_START = "<<<<<<<"; final String CONFLICT_SEP = "======="; final String CONFLICT_END = ">>>>>>>"; boolean addBlame = false; ConflictChunk<CommandLineBlameResult> conflict = new ConflictChunk<CommandLineBlameResult>( file.getCanonicalPath()); CommandLineBlameResult bResult; bResult = new CommandLineBlameResult(file.getCanonicalPath()); Blame<CommandLineBlameResult> cBlame; cBlame = new Blame<CommandLineBlameResult>(scenario.getLeft(), bResult); List<String> block; while ((block = readPorcelainBlock(buf)) != null) { String commit = block.get(0).split(" ")[0]; // for (String line : block) // System.out.println(line); Map<PKeys, String> data = getDataFromPorcelainBlock(block); String contentLine = data.get(PKeys.content); int n; if ((n = contentLine.trim().indexOf(" ")) == -1) { // line without blank space contentLine = contentLine.trim(); } else { contentLine = contentLine.trim().substring(0, n); } if (contentLine.equals(CONFLICT_START)) { addBlame = true; continue; } else if (contentLine.equals(CONFLICT_SEP)) { addBlame = true; cBlame.setRevision(scenario.getLeft()); conflict.setBase(scenario.getBase()); conflict.setLeft(cBlame); bResult = new CommandLineBlameResult(file.getCanonicalPath()); cBlame = new Blame<CommandLineBlameResult>(scenario.getRight(), bResult); continue; } else if (contentLine.equals(CONFLICT_END)) { conflict.setRight(cBlame); conflict.setLine(Integer.valueOf(data.get(PKeys.linenumber))); conflicts.add(conflict); addBlame = false; bResult = new CommandLineBlameResult(file.getCanonicalPath()); cBlame = new Blame<CommandLineBlameResult>(scenario.getLeft(), bResult); //@gustavo added this line conflict = new ConflictChunk<CommandLineBlameResult>(file.getCanonicalPath()); } else if (addBlame) { // we are in one of the conflicting chunks Integer linenumber = Integer.valueOf(data.get(PKeys.linenumber)); contentLine = data.get(PKeys.content); String name = data.get(PKeys.authorname); String email = data.get(PKeys.authormail); DeveloperNode dev = new DeveloperNode(name, email); conflict.setLine(linenumber); bResult.addLineAuthor(linenumber, dev); bResult.addLineContent(linenumber, contentLine); bResult.addLineCommit(linenumber, commit); continue; } } buf.close(); break; } /* * already finished to execute the process. now, we should process * the error output. */ buf = new BufferedReader(new InputStreamReader(pr.getErrorStream())); String stdErr = IOUtils.toString(pr.getErrorStream(), StandardCharsets.UTF_8).trim(); IOUtils.closeQuietly(pr.getInputStream()); IOUtils.closeQuietly(pr.getErrorStream()); IOUtils.closeQuietly(pr.getOutputStream()); exitCode = pr.waitFor(); buf.close(); if (!stdErr.isEmpty()) { Logger.log(String.format("Execution of '%s' returned standard error output:%n%s", cmd, stdErr)); throw new RuntimeException( String.format("Error on external call with exit code %d", pr.exitValue())); } } catch (IOException io) { try { throw new BlameException(file.getCanonicalPath(), "IO Exception", io); } catch (IOException e) { } } catch (InterruptedException ie) { // waitFor() exception exitCode = 666; try { throw new BlameException(file.getCanonicalPath(), String.format( "Interrupted while waiting for '%s' to finish. Error code: '%s'", cmd, exitCode), ie); } catch (IOException io) { } } catch (RuntimeException re) { try { throw new BlameException(file.getCanonicalPath(), "Runtime Exception", re); } catch (IOException e) { } } finally { run.freeMemory(); } pr.destroyForcibly(); return conflicts; }
From source file:io.hops.hopsworks.common.dao.tensorflow.config.TensorBoardProcessMgr.java
/** * Start the TensorBoard process/*from w w w.jav a2 s. co m*/ * @param project * @param user * @param hdfsUser * @param hdfsLogdir * @return * @throws IOException */ @TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED) public TensorBoardDTO startTensorBoard(Project project, Users user, HdfsUsers hdfsUser, String hdfsLogdir) throws IOException { String prog = settings.getHopsworksDomainDir() + "/bin/tensorboard.sh"; Process process = null; Integer port = 0; BigInteger pid = null; String tbBasePath = settings.getStagingDir() + Settings.TENSORBOARD_DIRS + File.separator; String projectUserUniquePath = project.getName() + "_" + hdfsUser.getName(); String tbPath = tbBasePath + DigestUtils.sha256Hex(projectUserUniquePath); String certsPath = "\"\""; File tbDir = new File(tbPath); if (tbDir.exists()) { for (File file : tbDir.listFiles()) { if (file.getName().endsWith(".pid")) { String pidContents = com.google.common.io.Files.readFirstLine(file, Charset.defaultCharset()); try { pid = BigInteger.valueOf(Long.parseLong(pidContents)); if (pid != null && ping(pid) == 0) { killTensorBoard(pid); } } catch (NumberFormatException nfe) { LOGGER.log(Level.WARNING, "Expected number in pidfile " + file.getAbsolutePath() + " got " + pidContents); } } } FileUtils.deleteDirectory(tbDir); } tbDir.mkdirs(); DistributedFileSystemOps dfso = dfsService.getDfsOps(); try { certsPath = tbBasePath + DigestUtils.sha256Hex(projectUserUniquePath + "_certs"); File certsDir = new File(certsPath); certsDir.mkdirs(); HopsUtils.materializeCertificatesForUserCustomDir(project.getName(), user.getUsername(), settings.getHdfsTmpCertDir(), dfso, certificateMaterializer, settings, certsPath); } catch (IOException ioe) { LOGGER.log(Level.SEVERE, "Failed in materializing certificates for " + hdfsUser + " in directory " + certsPath, ioe); HopsUtils.cleanupCertificatesForUserCustomDir(user.getUsername(), project.getName(), settings.getHdfsTmpCertDir(), certificateMaterializer, certsPath, settings); } finally { if (dfso != null) { dfsService.closeDfsClient(dfso); } } String anacondaEnvironmentPath = settings.getAnacondaProjectDir(project.getName()); int retries = 3; while (retries > 0) { if (retries == 0) { throw new IOException( "Failed to start TensorBoard for project=" + project.getName() + ", user=" + user.getUid()); } // use pidfile to kill any running servers port = ThreadLocalRandom.current().nextInt(40000, 59999); String[] command = new String[] { "/usr/bin/sudo", prog, "start", hdfsUser.getName(), hdfsLogdir, tbPath, port.toString(), anacondaEnvironmentPath, settings.getHadoopVersion(), certsPath, settings.getJavaHome() }; LOGGER.log(Level.INFO, Arrays.toString(command)); ProcessBuilder pb = new ProcessBuilder(command); try { // Send both stdout and stderr to the same stream pb.redirectErrorStream(true); process = pb.start(); synchronized (pb) { try { // Wait until the launcher bash script has finished process.waitFor(20l, TimeUnit.SECONDS); } catch (InterruptedException ex) { LOGGER.log(Level.SEVERE, "Woken while waiting for the TensorBoard to start: {0}", ex.getMessage()); } } int exitValue = process.exitValue(); String pidPath = tbPath + File.separator + port + ".pid"; File pidFile = new File(pidPath); // Read the pid for TensorBoard server if (pidFile.exists()) { String pidContents = com.google.common.io.Files.readFirstLine(pidFile, Charset.defaultCharset()); pid = BigInteger.valueOf(Long.parseLong(pidContents)); } if (exitValue == 0 && pid != null) { int maxWait = 10; String logFilePath = tbPath + File.separator + port + ".log"; File logFile = new File(logFilePath); while (maxWait > 0) { String logFileContents = com.google.common.io.Files.readFirstLine(logFile, Charset.defaultCharset()); // It is not possible to have a fixed wait time before showing the TB, we need to be sure it has started if (logFile.length() > 0 && (logFileContents.contains("Loaded") | logFileContents.contains("Reloader") | logFileContents.contains("event")) | maxWait == 1) { Thread.currentThread().sleep(5000); TensorBoardDTO tensorBoardDTO = new TensorBoardDTO(); String host = null; try { host = InetAddress.getLocalHost().getHostAddress(); } catch (UnknownHostException ex) { Logger.getLogger(TensorBoardProcessMgr.class.getName()).log(Level.SEVERE, null, ex); } tensorBoardDTO.setEndpoint(host + ":" + port); tensorBoardDTO.setPid(pid); return tensorBoardDTO; } else { Thread.currentThread().sleep(1000); maxWait--; } } TensorBoardDTO tensorBoardDTO = new TensorBoardDTO(); tensorBoardDTO.setPid(pid); String host = null; try { host = InetAddress.getLocalHost().getHostAddress(); } catch (UnknownHostException ex) { Logger.getLogger(TensorBoardProcessMgr.class.getName()).log(Level.SEVERE, null, ex); } tensorBoardDTO.setEndpoint(host + ":" + port); return tensorBoardDTO; } else { LOGGER.log(Level.SEVERE, "Failed starting TensorBoard got exitcode " + exitValue + " retrying on new port"); if (pid != null) { this.killTensorBoard(pid); } pid = null; } } catch (Exception ex) { LOGGER.log(Level.SEVERE, "Problem starting TensorBoard: {0}", ex); if (process != null) { process.destroyForcibly(); } } finally { retries--; } } //Failed to start TensorBoard, make sure there is no process running for it! (This should not be needed) if (pid != null && this.ping(pid) == 0) { this.killTensorBoard(pid); } //Certificates cleanup in case they were materialized but no TB started successfully dfso = dfsService.getDfsOps(); certsPath = tbBasePath + DigestUtils.sha256Hex(projectUserUniquePath + "_certs"); File certsDir = new File(certsPath); certsDir.mkdirs(); try { HopsUtils.cleanupCertificatesForUserCustomDir(user.getUsername(), project.getName(), settings.getHdfsTmpCertDir(), certificateMaterializer, certsPath, settings); } finally { if (dfso != null) { dfsService.closeDfsClient(dfso); } } return null; }