Java tutorial
/* * Copyright (C) 2015 University of Pittsburgh. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301 USA */ package edu.pitt.dbmi.ccd.queue.service; import edu.pitt.dbmi.ccd.connection.SlurmClient; import edu.pitt.dbmi.ccd.connection.slurm.JobStat; import edu.pitt.dbmi.ccd.connection.slurm.JobStatus; import edu.pitt.dbmi.ccd.db.entity.HpcParameter; import edu.pitt.dbmi.ccd.db.entity.JobQueueInfo; import edu.pitt.dbmi.ccd.db.entity.UserAccount; import edu.pitt.dbmi.ccd.db.service.JobQueueInfoService; import java.nio.file.Path; import java.nio.file.Paths; import java.util.Arrays; import java.util.LinkedList; import java.util.List; import java.util.Properties; import java.util.Set; import java.util.concurrent.Future; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; import org.springframework.context.annotation.Profile; import org.springframework.scheduling.annotation.Async; import org.springframework.scheduling.annotation.AsyncResult; import org.springframework.scheduling.annotation.EnableAsync; import org.springframework.stereotype.Service; /** * * May 24, 2016 6:08:41 PM * * @author Chirayu (Kong) Wongchokprasitti * */ @Profile("slurm") @Service @EnableAsync public class AlgorithmSlurmService { private static final Logger LOGGER = LoggerFactory.getLogger(AlgorithmSlurmService.class); private final JobQueueInfoService jobQueueInfoService; private final SlurmClient client; private final String workspace; private final String dataFolder; private final String tempFolder; private final String resultFolder; private final String algorithmResultFolder; private final String jobTemplates; private final String checkUserDir; private final String causalJob; private final String remotedataspace; private final String remoteworkspace; private final String checkUserDirScript; private final String runSlurmJobScript; private final String hpcPartition; private final int hpcWallTime; @Autowired(required = true) public AlgorithmSlurmService(@Value("${ccd.server.workspace}") String workspace, @Value("${ccd.folder.data:data}") String dataFolder, @Value("${ccd.folder.tmp:tmp}") String tempFolder, @Value("${ccd.folder.results:results}") String resultFolder, @Value("${ccd.folder.results.algorithm:algorithm}") String algorithmResultFolder, @Value("${ccd.folder.job_templates}") String jobTemplates, @Value("${ccd.template.checkuserdir}") String checkUserDir, @Value("${ccd.template.causaljob}") String causalJob, @Value("${ccd.remote.server.dataspace}") String remotedataspace, @Value("${ccd.remote.server.workspace}") String remoteworkspace, @Value("${ccd.script.checkuserdir:checkUserDir.sh}") String checkUserDirScript, @Value("${ccd.script.runslurmjob:runSlurmJobScript.sh}") String runSlurmJobScript, @Value("${ccd.hpc.partition:RM}") String hpcPartition, @Value("${ccd.hpc.wall.time:1}") int hpcWallTime, JobQueueInfoService queuedJobInfoService) { this.jobQueueInfoService = queuedJobInfoService; this.client = new SlurmClient(); this.workspace = workspace; this.dataFolder = dataFolder; this.tempFolder = tempFolder; this.resultFolder = resultFolder; this.algorithmResultFolder = algorithmResultFolder; this.jobTemplates = jobTemplates; this.checkUserDir = checkUserDir; this.causalJob = causalJob; this.remotedataspace = remotedataspace; this.remoteworkspace = remoteworkspace; this.checkUserDirScript = checkUserDirScript; this.runSlurmJobScript = runSlurmJobScript; this.hpcPartition = hpcPartition; this.hpcWallTime = hpcWallTime; } public JobStat getJobStat(Long jobId) { JobStat stat = null; try { stat = client.getJobStat(jobId); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } return stat; } public List<JobStatus> getFinishedJobs() { List<JobStatus> jobs = null; try { jobs = client.getFinishedJobs(); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } return jobs; } private void deleteRunSlurmScript(JobQueueInfo jobQueueInfo) { Long queueId = jobQueueInfo.getId(); Set<UserAccount> userAccounts = jobQueueInfo.getUserAccounts(); UserAccount userAccount = (UserAccount) userAccounts.toArray()[0]; String username = userAccount.getUsername(); Path scriptPath = Paths.get(remoteworkspace, username, runSlurmJobScript); String scriptDir = scriptPath.toAbsolutePath().toString() + queueId + ".sh"; if (client.remoteFileExistes(scriptDir)) { client.deleteRemoteFile(scriptDir); } } @Async public Future<Void> cancelSlurmJob(JobQueueInfo jobQueueInfo) { Long jobId = jobQueueInfo.getPid(); try { client.cancelJob(jobId); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } deleteRunSlurmScript(jobQueueInfo); return new AsyncResult<>(null); } public Future<Void> downloadJobResult(JobQueueInfo jobQueueInfo) { String fileName = jobQueueInfo.getFileName() + ".txt"; String tmpDirectory = jobQueueInfo.getTmpDirectory(); String outputDirectory = jobQueueInfo.getOutputDirectory(); Path src = Paths.get(tmpDirectory, fileName); Path dest = Paths.get(outputDirectory, fileName); String jsonFileName = jobQueueInfo.getFileName() + ".json"; Path json = Paths.get(tmpDirectory, jsonFileName); Path jsonDest = Paths.get(outputDirectory, jsonFileName); String errorFileName = String.format("error_%s", fileName); Path error = Paths.get(tmpDirectory, errorFileName); Path errorDest = Paths.get(outputDirectory, errorFileName); try { LOGGER.info("Checking File: " + src.toAbsolutePath().toString()); if (client.remoteFileExistes(src.toAbsolutePath().toString())) { LOGGER.info("Downloading File: " + src.toAbsolutePath().toString()); client.downloadOutput(src.toAbsolutePath().toString(), dest.toAbsolutePath().toString()); client.deleteRemoteFile(src.toAbsolutePath().toString()); LOGGER.info("Checking File: " + json.toAbsolutePath().toString()); if (client.remoteFileExistes(json.toAbsolutePath().toString())) { LOGGER.info("Downloading File: " + json.toAbsolutePath().toString()); client.downloadOutput(json.toAbsolutePath().toString(), jsonDest.toAbsolutePath().toString()); client.deleteRemoteFile(json.toAbsolutePath().toString()); } } else if (client.remoteFileExistes(error.toAbsolutePath().toString())) { LOGGER.info("Downloading File: " + error.toAbsolutePath().toString()); client.downloadOutput(error.toAbsolutePath().toString(), errorDest.toAbsolutePath().toString()); } client.deleteRemoteFile(error.toAbsolutePath().toString()); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } deleteRunSlurmScript(jobQueueInfo); return new AsyncResult<>(null); } public Future<Void> submitJobtoSlurm(JobQueueInfo jobQueueInfo) { Long queueId = jobQueueInfo.getId(); String fileName = jobQueueInfo.getFileName() + ".txt"; String commands = jobQueueInfo.getCommands(); String tmpDirectory = jobQueueInfo.getTmpDirectory(); Properties p = new Properties(); // Upload dataset(s) to the remote data storage Set<UserAccount> userAccounts = jobQueueInfo.getUserAccounts(); UserAccount userAccount = (UserAccount) userAccounts.toArray()[0]; String username = userAccount.getUsername(); Path checkUserDirTemplate = Paths.get(workspace, jobTemplates, checkUserDir); String checkUserDirTemplateDir = checkUserDirTemplate.toAbsolutePath().toString(); p.setProperty("causalUser", username); p.setProperty("tmp", tempFolder); p.setProperty("results", resultFolder); p.setProperty("algorithm", algorithmResultFolder); String partition = hpcPartition; int walltime = hpcWallTime; Set<HpcParameter> hpcParameters = jobQueueInfo.getHpcParameters(); if (hpcParameters != null && !hpcParameters.isEmpty()) { for (HpcParameter param : hpcParameters) { if (param.getParameterKey().equalsIgnoreCase("partition")) { partition = param.getParameterValue(); } String key = param.getParameterKey(); String value = param.getParameterValue(); switch (key) { case "partition": partition = value; break; case "walltime": walltime = Integer.parseInt(value); break; } } } p.setProperty("partition", partition); p.setProperty("walltime", String.format("%02d:00:00", walltime)); List<String> cmdList = new LinkedList<>(); cmdList.addAll(Arrays.asList(commands.split(";"))); String datasets = null; for (int i = 0; i < cmdList.size(); i++) { String cmd = cmdList.get(i); if (cmd.equalsIgnoreCase("--data")) { datasets = cmdList.get(i + 1); break; } } List<String> datasetList = new LinkedList<>(); datasetList.addAll(Arrays.asList(datasets.split(","))); // The current dataset path is the one on the grid datasetList.forEach(dataset -> { // Extract fileName from the dataset Path dataPath = Paths.get(remotedataspace, username, dataFolder); String dataFile = dataset.replace(dataPath.toAbsolutePath().toString(), ""); //The dataset's source path dataPath = Paths.get(workspace, username, dataFolder, dataFile); Path scriptPath = Paths.get(remoteworkspace, checkUserDirScript); String scriptDir = scriptPath.toAbsolutePath().toString() + username + ".sh"; LOGGER.info("submitJobtoSlurm: checkUserDirScript: " + scriptDir); try { client.uploadFile(checkUserDirTemplateDir, p, scriptDir, dataPath.toAbsolutePath().toString(), dataset); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } }); String knowledges = null; for (int i = 0; i < cmdList.size(); i++) { String cmd = cmdList.get(i); if (cmd.equalsIgnoreCase("--knowledge")) { knowledges = cmdList.get(i + 1); break; } } if (knowledges != null) { List<String> knowledgeList = new LinkedList<>(); knowledgeList.addAll(Arrays.asList(knowledges.split(","))); knowledgeList.forEach(knowledge -> { // Extract fileName from the knowledge path Path knowledgePath = Paths.get(remotedataspace, username, dataFolder); String knowledgeFile = knowledge.replace(knowledgePath.toAbsolutePath().toString(), ""); //The knowledge's source path knowledgePath = Paths.get(workspace, username, dataFolder, knowledgeFile); Path scriptPath = Paths.get(remoteworkspace, checkUserDirScript); String scriptDir = scriptPath.toAbsolutePath().toString() + username + ".sh"; LOGGER.info("submitJobtoSlurm: checkUserDirScript: " + scriptDir); try { client.uploadFile(checkUserDirTemplateDir, p, scriptDir, knowledgePath.toAbsolutePath().toString(), knowledge); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } }); } cmdList.add("--out"); cmdList.add(tmpDirectory); String errorFileName = String.format("error_%s", fileName); Path error = Paths.get(tmpDirectory, errorFileName); // Redirect Error to File cmdList.add("2>"); cmdList.add(error.toAbsolutePath().toString()); StringBuilder sb = new StringBuilder(); cmdList.forEach(cmd -> { sb.append(cmd); sb.append(" "); }); LOGGER.info("Algorithm command: " + sb.toString()); try { // Submit a job & Get remote job Id p.setProperty("email", userAccount.getPerson().getEmail()); p.setProperty("cmd", sb.toString()); Path causalJobTemplate = Paths.get(workspace, jobTemplates, causalJob); String causalJobTemplateDir = causalJobTemplate.toAbsolutePath().toString(); Path scriptPath = Paths.get(remoteworkspace, username, runSlurmJobScript); String scriptDir = scriptPath.toAbsolutePath().toString() + queueId + ".sh"; LOGGER.info("submitJobtoSlurm: runSlurmJobScript: " + scriptDir); long pid = client.submitJob(causalJobTemplateDir, p, scriptDir); JobQueueInfo queuedJobInfo = jobQueueInfoService.findOne(queueId); LOGGER.info("Set Job's pid to be: " + pid); queuedJobInfo.setPid(pid); jobQueueInfoService.saveJobIntoQueue(queuedJobInfo); } catch (Exception exception) { LOGGER.error("Algorithm did not run successfully.", exception); } return new AsyncResult<>(null); } }