Java tutorial
/* * Copyright 2012 AMG.lab, a Bull Group Company * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.xlcloud.xsa.ext.hpc.service.impl; import java.io.File; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.HashSet; import java.util.List; import java.util.Set; import javax.inject.Inject; import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.builder.ToStringBuilder; import org.apache.commons.lang.builder.ToStringStyle; import org.apache.log4j.Logger; import org.xlcloud.config.ConfigParam; import org.xlcloud.rest.exception.InternalErrorException; import org.xlcloud.rest.exception.ObjectNotFoundException; import org.xlcloud.rest.exception.ValidationException; import org.xlcloud.xsa.Application; import org.xlcloud.xsa.ExecParams; import org.xlcloud.xsa.HpcJob; import org.xlcloud.xsa.HpcJobSubmission; import org.xlcloud.xsa.ext.hpc.service.ApplicationsManager; import org.xlcloud.xsa.ext.hpc.service.HpcJobsManager; import org.xlcloud.xsa.ext.hpc.service.parser.JobAccountingListOutputParser; import org.xlcloud.xsa.ext.hpc.service.parser.JobAccountingOutputParser; import org.xlcloud.xsa.ext.hpc.service.parser.JobDetailsOutputParser; import org.xlcloud.xsa.ext.hpc.service.parser.JobSubmissionOutputParser; import org.xlcloud.xsa.ext.hpc.service.parser.JobTerminationOutputParser; import org.xlcloud.xsa.ext.hpc.service.parser.JobsListOutputParser; import org.xlcloud.xsa.ext.hpc.service.process.ProcessExecutionResult; import org.xlcloud.xsa.ext.hpc.service.process.ProcessExecutor; import ch.lambdaj.Lambda; import ch.lambdaj.function.convert.Converter; /** * {@link JobsManager} implementation issuing SLURM commands. * * @author Krzysztof Szafraski, AMG.net */ public class SlurmHpcJobsManager implements HpcJobsManager { private static final Logger LOG = Logger.getLogger(SlurmHpcJobsManager.class); @Inject private ApplicationsManager applicationsManager; @Inject private ProcessExecutor executor; @Inject private JobSubmissionOutputParser jobSubmissionParser; @Inject private JobDetailsOutputParser jobDetailsParser; @Inject private JobAccountingOutputParser jobAccountingParser; @Inject private JobsListOutputParser jobsListParser; @Inject private JobAccountingListOutputParser jobAccountingListOutputParser; @Inject private JobTerminationOutputParser jobTerminateOutputParser; @Inject @ConfigParam private String workDir; @Override public HpcJob schedule(String applicationName, HpcJobSubmission jobSubmission) throws ValidationException, InternalErrorException, ObjectNotFoundException { if (LOG.isDebugEnabled()) { LOG.debug("Trying to schedule job, got submission: " + ToStringBuilder.reflectionToString(jobSubmission, ToStringStyle.SHORT_PREFIX_STYLE)); } Application application = applicationsManager.get(applicationName); validateJobSubmission(application, jobSubmission); List<String> command = new ArrayList<>(); command.add("sbatch"); addOption(command, "-J", jobSubmission.getName()); ExecParams execParams = jobSubmission.getExecParams(); if (execParams != null) { addOption(command, "-N", execParams.getNumNodes()); addOption(command, "-c", execParams.getNumCores()); } String appWorkDir = workDir + (StringUtils.endsWith(workDir, File.separator) ? "" : File.separator) + applicationName; addOption(command, "-D", appWorkDir); List<String> inputLines = new ArrayList<>(); inputLines.add("#!/bin/sh"); for (String singleCommand : application.getCommands()) { inputLines.add(singleCommand); } ProcessExecutionResult result = executor.run(command, inputLines); String jobId = jobSubmissionParser.parse(result); return get(jobId); } private void addOption(List<String> command, String option, Object value) { if (value != null) { command.add(option); command.add(value.toString()); } } private void validateJobSubmission(Application application, HpcJobSubmission jobSubmission) throws ValidationException, InternalErrorException { if (jobSubmission == null) { String message = "HpcJobSubmission cannot be null"; LOG.warn(message); throw new ValidationException(message); } if (application.getCommands().isEmpty()) { String message = "Applications has no commands"; LOG.info(message); throw new ValidationException(message); } } @Override public HpcJob get(String id) { if (LOG.isDebugEnabled()) { LOG.debug("Trying to get details of job with id=" + id); } ProcessExecutionResult result = executor.run(Arrays.asList("scontrol", "show", "job", id)); try { return jobDetailsParser.parse(result); } catch (ObjectNotFoundException e) { LOG.debug("Job not found on queue. Searching in accounting database."); ProcessExecutionResult accResult = executor.run(Arrays.asList("sacct", "-j", id, "-n", "-p", "--format=JobID,JobName,user,state,ExitCode,submit")); return jobAccountingParser.parse(accResult); } } @Override public List<HpcJob> list() { if (LOG.isDebugEnabled()) { LOG.debug("Trying to to list jobs"); } ProcessExecutionResult result = executor.run(Arrays.asList("scontrol", "show", "jobs")); List<HpcJob> jobs = jobsListParser.parse(result); ProcessExecutionResult accResult = executor.run(Arrays.asList("sacct", "-a", "-n", "-p", "--format=JobID,JobName,user,state,ExitCode,submit", "--state=CA,CD,F")); List<HpcJob> accJobs = jobAccountingListOutputParser.parse(accResult); /* * scontrol returns job details for some time after the job failed or * completed. This means we can get details of the same job twice - from * scontrol and from sacct. In this case we only include the info we get * from scontrol. */ Set<Integer> retrievedIds = new HashSet<>(Lambda.convert(jobs, new Converter<HpcJob, Integer>() { @Override public Integer convert(HpcJob from) { return from.getId(); } })); for (HpcJob accJob : accJobs) { if (!retrievedIds.contains(accJob.getId())) { jobs.add(accJob); } } // sort jobs by id Collections.sort(jobs, new Comparator<HpcJob>() { @Override public int compare(HpcJob job1, HpcJob job2) { return job1.getId().compareTo(job2.getId()); } }); return jobs; } @Override public void terminate(String id) { if (LOG.isDebugEnabled()) { LOG.debug("Trying to terminate job with id=" + id); } ProcessExecutionResult result = executor.run(Arrays.asList("scancel", "--signal=KILL", id)); jobTerminateOutputParser.parse(result); } }