org.niord.core.batch.BatchService.java Source code

Java tutorial

Introduction

Here is the source code for org.niord.core.batch.BatchService.java

Source

/*
 * Copyright 2016 Danish Maritime Authority.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.niord.core.batch;

import com.fasterxml.jackson.core.type.TypeReference;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang.StringUtils;
import org.niord.core.batch.vo.BatchExecutionVo;
import org.niord.core.batch.vo.BatchInstanceVo;
import org.niord.core.batch.vo.BatchStatusVo;
import org.niord.core.batch.vo.BatchTypeVo;
import org.niord.core.domain.DomainService;
import org.niord.core.sequence.DefaultSequence;
import org.niord.core.sequence.Sequence;
import org.niord.core.sequence.SequenceService;
import org.niord.core.service.BaseService;
import org.niord.core.settings.Setting.Type;
import org.niord.core.settings.annotation.Setting;
import org.niord.core.user.UserService;
import org.niord.core.util.JsonUtils;
import org.niord.model.search.PagedSearchResultVo;
import org.slf4j.Logger;

import javax.batch.operations.JobOperator;
import javax.batch.operations.NoSuchJobException;
import javax.ejb.Schedule;
import javax.ejb.Stateless;
import javax.inject.Inject;
import java.io.*;
import java.nio.file.*;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.*;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;

/**
 * Provides an interface for managing batch jobs.
 * <p>
 * Batch jobs have up to three associated batch job folders under the "batchJobRoot" path:
 * <ul>
 *  <li>
 *      <b>[jobName]/in</b>:
 *      The <b>in</b> folders will be monitored periodically, and any file placed in this folder will
 *      result in the <b>jobName</b> batch job being executed.
 *  </li>
 *  <li>
 *      <b>[jobName]/execution/[year]/[month]/[jobNo]</b>:
 *      Stores any input file associated with a batch job along with log files for the executions steps.
 *      These directories will be cleaned up after a configurable amount of time.
 *  </li>
 *  <li>
 *      <b>[jobName]/out</b>:
 *      Any file-based result from the execution of a batch job can be placed here.
 *      Not implemented yet.
 *  </li>
 * </ul>
 * <p>
 *
 * Note to self: Currently, there is a bug in Wildfly, so that the batch job xml files cannot be loaded
 * from an included jar.
 * Hence, move the xml files to META-INF/batch-jobs of the web application you are working on.
 *
 * @see <a href="https://issues.jboss.org/browse/WFLY-4988">Error report</a>
 * @see <a href="https://github.com/NiordOrg/niord-dk/tree/master/niord-dk-web">Example solution</a>
 */
@Stateless
@SuppressWarnings("unused")
public class BatchService extends BaseService {

    public static final String BATCH_REPO_FOLDER = "batch";
    public static final String BATCH_JOB_ENTITY = "batchJobEntity";

    @Inject
    private Logger log;

    @Inject
    UserService userService;

    @Inject
    DomainService domainService;

    @Inject
    JobOperator jobOperator;

    @Inject
    SequenceService sequenceService;

    @Inject
    @Setting(value = "batchJobRootPath", defaultValue = "${niord.home}/batch-jobs", description = "The root directory of the Niord batch jobs")
    private Path batchJobRoot;

    // Delete execution files after 10 days
    @Inject
    @Setting(value = "batchFileExpiryDays", defaultValue = "10", description = "Number of days after which batch job files are deleted", type = Type.Integer)
    private int batchFileExpiryDays;

    /****************************/
    /** Starting batch jobs    **/
    /****************************/

    /**
     * Starts a new batch job
     *
     * @param job the batch job name
     */
    public long startBatchJob(BatchData job) {

        // Note to self:
        // There are some transaction issues with storing the BatchData in the current transaction,
        // so, we leave it to the BatchJobListener.

        // Launch the batch job
        Properties props = new Properties();
        props.put(BATCH_JOB_ENTITY, job);
        long executionId = jobOperator.start(job.getJobName(), props);

        log.info("Started batch job: " + job);
        return executionId;
    }

    /**
     * Creates and initializes a new batch job data entity
     */
    private BatchData initBatchData(String jobName, Map<String, Object> properties) throws IOException {
        // Construct a new batch data entity
        BatchData job = new BatchData();
        job.setUser(userService.currentUser());
        job.setDomain(domainService.currentDomain());
        job.setJobName(jobName);
        job.setJobNo(getNextJobNo(jobName));
        if (properties != null) {
            job.getProperties().putAll(properties);
        }
        return job;
    }

    /**
     * Starts a new batch job
     *
     * @param jobName the batch job name
     */
    public long startBatchJobWithDeflatedData(String jobName, Object data, String dataFileName,
            Map<String, Object> properties) throws Exception {

        BatchData job = initBatchData(jobName, properties);

        if (data != null) {
            dataFileName = StringUtils.isNotBlank(dataFileName) ? dataFileName : "batch-data.zip";
            job.setDataFileName(dataFileName);
            Path path = computeBatchJobPath(job.computeDataFilePath());
            createDirectories(path.getParent());
            try (FileOutputStream file = new FileOutputStream(path.toFile());
                    GZIPOutputStream gzipOut = new GZIPOutputStream(file);
                    ObjectOutputStream objectOut = new ObjectOutputStream(gzipOut)) {
                objectOut.writeObject(data);
            }
        }

        return startBatchJob(job);
    }

    /**
     * Starts a new batch job
     *
     * @param jobName the batch job name
     */
    public long startBatchJobWithJsonData(String jobName, Object data, String dataFileName,
            Map<String, Object> properties) throws Exception {

        BatchData job = initBatchData(jobName, properties);

        if (data != null) {
            dataFileName = StringUtils.isNotBlank(dataFileName) ? dataFileName : "batch-data.json";
            job.setDataFileName(dataFileName);
            Path path = computeBatchJobPath(job.computeDataFilePath());
            createDirectories(path.getParent());
            JsonUtils.writeJson(data, path);
        }

        return startBatchJob(job);
    }

    /**
     * Starts a new file-based batch job
     *
     * @param jobName the batch job name
     */
    public long startBatchJobWithDataFile(String jobName, InputStream in, String dataFileName,
            Map<String, Object> properties) throws IOException {

        BatchData job = initBatchData(jobName, properties);

        if (in != null) {
            job.setDataFileName(dataFileName);
            Path path = computeBatchJobPath(job.computeDataFilePath());
            createDirectories(path.getParent());
            Files.copy(in, path, StandardCopyOption.REPLACE_EXISTING);
        }

        return startBatchJob(job);
    }

    /**
     * Starts a new file-based batch job
     *
     * @param jobName the batch job name
     */
    public long startBatchJobWithDataFile(String jobName, Path file, Map<String, Object> properties)
            throws IOException {
        if (!Files.isRegularFile(file)) {
            throw new IllegalArgumentException("Invalid file " + file);
        }

        try (InputStream in = new FileInputStream(file.toFile())) {
            return startBatchJobWithDataFile(jobName, in, file.getFileName().toString(), properties);
        }
    }

    /**
     * Returns the next sequence number for the batch job
     *
     * @param jobName the name of the batch job
     * @return the next sequence number for the batch job
     */
    private Long getNextJobNo(String jobName) {
        Sequence jobSequence = new DefaultSequence("BATCH_JOB_" + jobName, 1);
        return sequenceService.nextValue(jobSequence);
    }

    /****************************/
    /** Batch job repository   **/
    /****************************/

    /**
     * Creates the given directories if they do not exist
     */
    private Path createDirectories(Path path) throws IOException {
        if (path != null && !Files.exists(path)) {
            Files.createDirectories(path);
        }
        return path;
    }

    /**
     * Computes the absolute path to the given local path within the repository
     *
     * @param localPath the local path withing the batch job repository
     * @return the absolute path to the given local path within the repository
     */
    public Path computeBatchJobPath(Path localPath) {

        Path path = batchJobRoot.normalize().resolve(localPath);

        // Check that it is a valid path within the batch job repository root
        path = path.normalize();
        if (!path.startsWith(batchJobRoot)) {
            throw new RuntimeException("Invalid path " + localPath);
        }

        return path;
    }

    /**
     * Returns the data file associated with the given batch job instance.
     * Returns null if no data file is found
     *
     * @param instanceId the batch job instance
     * @return the data file associated with the given batch job instance.
     */
    public Path getBatchJobDataFile(Long instanceId) {
        BatchData job = findByInstanceId(instanceId);

        if (job == null || job.getDataFileName() == null) {
            return null;
        }

        Path path = computeBatchJobPath(job.computeDataFilePath());
        return Files.isRegularFile(path) ? path : null;
    }

    /**
     * Loads the batch job JSON data file as the given class.
     * Returns null if no data file is found
     *
     * @param instanceId the batch job instance
     * @return the data
     */
    public <T> T readBatchJobJsonDataFile(Long instanceId, Class<T> dataClass) throws IOException {
        Path path = getBatchJobDataFile(instanceId);

        return path != null ? JsonUtils.readJson(dataClass, path) : null;
    }

    /**
     * Loads the batch job JSON data file as the given class.
     * Returns null if no data file is found
     *
     * @param instanceId the batch job instance
     * @return the data
     */
    public <T> T readBatchJobJsonDataFile(Long instanceId, TypeReference typeRef) throws IOException {
        Path path = getBatchJobDataFile(instanceId);

        return path != null ? JsonUtils.readJson(typeRef, path) : null;
    }

    /**
     * Loads the batch job deflated data file as the given class.
     * Returns null if no data file is found
     *
     * @param instanceId the batch job instance
     * @return the data
     */
    @SuppressWarnings("unchecked")
    public <T> T readBatchJobDeflatedDataFile(Long instanceId) throws IOException, ClassNotFoundException {
        Path path = getBatchJobDataFile(instanceId);

        if (path != null) {
            try (FileInputStream file = new FileInputStream(path.toFile());
                    GZIPInputStream gzipIn = new GZIPInputStream(file);
                    ObjectInputStream objectIn = new ObjectInputStream(gzipIn)) {
                return (T) objectIn.readObject();
            }
        }

        return null;
    }

    /**
     * Returns the list of log names in the batch job directory
     * @param instanceId the instance id
     * @return the list of log names in the batch job directory
     */
    public List<String> getBatchJobLogFiles(Long instanceId) throws IOException {

        BatchData job = findByInstanceId(instanceId);
        if (job == null) {
            throw new IllegalArgumentException("Invalid batch instance ID " + instanceId);
        }

        Path path = computeBatchJobPath(job.computeBatchJobFolderPath());

        File[] files = path.toFile().listFiles();
        return files == null ? Collections.emptyList()
                : Arrays.stream(files)
                        .filter(f -> Files.isRegularFile(f.toPath())
                                && f.getName().matches(".+Log\\.txt(\\.\\d+)?"))
                        .map(File::getName).sorted().collect(Collectors.toList());
    }

    /**
     * Returns the contents of the log file with the given file name.
     * If fromLineNo is specified, only the subsequent lines are returned.
     *
     * @param instanceId the instance id
     * @param logFileName the log file name
     * @param fromLineNo if specified, only the subsequent lines are returned
     * @return the contents of the log file
     */
    public String getBatchJobLogFile(Long instanceId, String logFileName, Integer fromLineNo) throws IOException {

        BatchData job = findByInstanceId(instanceId);
        if (job == null) {
            throw new IllegalArgumentException("Invalid batch instance ID " + instanceId);
        }

        Path path = computeBatchJobPath(job.computeBatchJobFolderPath().resolve(logFileName));
        if (!Files.isRegularFile(path) || !Files.isReadable(path)) {
            throw new IllegalArgumentException("Invalid batch log file " + logFileName);
        }

        int skipLineNo = fromLineNo == null ? 0 : fromLineNo;
        try (Stream<String> logStream = Files.lines(path)) {
            return logStream.skip(skipLineNo).collect(Collectors.joining("\n"));
        }
    }

    /****************************/
    /** Utility methods        **/
    /****************************/

    /**
     * Returns the batch data entity with the given instance id.
     * Returns null if no batch data entity is not found.
     *
     * @param instanceId the instance id
     * @return the batch data entity with the given instance id
     */
    public BatchData findByInstanceId(Long instanceId) {
        try {
            return em.createNamedQuery("BatchData.findByInstanceId", BatchData.class)
                    .setParameter("instanceId", instanceId).getSingleResult();
        } catch (Exception e) {
            return null;
        }
    }

    /**
     * Returns the batch data entities with the given instance ids.
     *
     * @param instanceIds the instance ids
     * @return the batch data entities with the given instance ids
     */
    public List<BatchData> findByInstanceIds(Set<Long> instanceIds) {
        return em.createNamedQuery("BatchData.findByInstanceIds", BatchData.class)
                .setParameter("instanceIds", instanceIds).getResultList();
    }

    /**
     * Creates or updates the batch job.
     *
     * @param job the batch job entity
     */
    public BatchData saveBatchJob(BatchData job) {
        Objects.requireNonNull(job, "Invalid job parameter");
        Objects.requireNonNull(job.getInstanceId(), "Invalid job instance ID");

        job = saveEntity(job);
        em.flush();
        return job;
    }

    /**
     * Updates progress (0-100) for the given batch job.
     *
     * @param instanceIds the batch job id
     * @param progress the progress
     */
    public void updateBatchJobProgress(Long instanceIds, Integer progress) {

        BatchData job = findByInstanceId(instanceIds);
        if (job != null) {
            job.setProgress(progress);
            saveEntity(job);
        }
    }

    /****************************/
    /** Managing batch jobs    **/
    /****************************/

    /**
     * Returns the batch job names
     *
     * @return the batch job names
     */
    @SuppressWarnings("unchecked")
    public List<String> getJobNames() {
        // Sadly, this gets reset upon every JVM restart
        /*
        return jobOperator.getJobNames()
            .stream()
            .sorted()
            .collect(Collectors.toList());
        */

        // Look up the names from the database
        //noinspection SqlDialectInspection
        return (List<String>) em
                .createNativeQuery("SELECT DISTINCT JOBNAME FROM JOB_INSTANCE ORDER BY lower(JOBNAME)")
                .getResultList();
    }

    /**
     * Stops the given batch job execution
     *
     * @param executionId the execution ID
     */
    public void stopExecution(long executionId) {
        jobOperator.stop(executionId);
    }

    /**
     * Restarts the given batch job execution
     *
     * @param executionId the execution ID
     */
    public long restartExecution(long executionId) {
        return jobOperator.restart(executionId, new Properties());
    }

    /**
     * Abandons the given batch job execution
     *
     * @param executionId the execution ID
     */
    public void abandonExecution(long executionId) {
        jobOperator.abandon(executionId);
    }

    /**
     * Returns the paged search result for the given batch type
     *
     * @param jobName the job name
     * @param start   the start index of the paged search result
     * @param count   the max number of instances per start
     * @return the paged search result for the given batch type
     */
    public PagedSearchResultVo<BatchInstanceVo> getJobInstances(String jobName, int start, int count)
            throws Exception {

        Objects.requireNonNull(jobName);
        PagedSearchResultVo<BatchInstanceVo> result = new PagedSearchResultVo<>();

        result.setTotal(jobOperator.getJobInstanceCount(jobName));

        jobOperator.getJobInstances(jobName, start, count).forEach(i -> {
            BatchInstanceVo instance = new BatchInstanceVo();
            instance.setName(i.getJobName());
            instance.setInstanceId(i.getInstanceId());
            result.getData().add(instance);
            jobOperator.getJobExecutions(i).forEach(e -> {
                BatchExecutionVo execution = new BatchExecutionVo();
                execution.setExecutionId(e.getExecutionId());
                execution.setBatchStatus(e.getBatchStatus());
                execution.setStartTime(e.getStartTime());
                execution.setEndTime(e.getEndTime());
                instance.getExecutions().add(execution);
            });
            instance.updateExecutions();
        });

        // Next, copy the associated batch data to the instance VO
        Set<Long> instanceIds = result.getData().stream().map(BatchInstanceVo::getInstanceId)
                .collect(Collectors.toSet());
        Map<Long, BatchData> batchDataLookup = findByInstanceIds(instanceIds).stream()
                .collect(Collectors.toMap(BatchData::getInstanceId, Function.identity()));
        for (BatchInstanceVo i : result.getData()) {
            BatchData data = batchDataLookup.get(i.getInstanceId());
            if (data != null) {
                i.setFileName(data.getDataFileName());
                i.setJobNo(data.getJobNo());
                i.setUser(data.getUser() != null ? data.getUser().getName() : null);
                i.setDomain(data.getDomain() != null ? data.getDomain().getDomainId() : null);
                i.setJobName(data.getJobName());
                i.setProperties(data.getProperties());
                i.setProgress(data.getProgress());
            }
        }

        result.updateSize();
        return result;
    }

    /**
     * Returns the status of the batch job system
     *
     * @return the status of the batch job system
     */
    public BatchStatusVo getStatus() {
        BatchStatusVo status = new BatchStatusVo();
        getJobNames().forEach(name -> {
            // Create a status for the batch type
            BatchTypeVo batchType = new BatchTypeVo();
            batchType.setName(name);
            try {
                batchType.setRunningExecutions(jobOperator.getRunningExecutions(name).size());
            } catch (NoSuchJobException ignored) {
                // When the JVM has restarted the call will fail until the job has executed the first time.
                // A truly annoying behaviour, given that we use persisted batch jobs.
            }
            batchType.setInstanceCount(jobOperator.getJobInstanceCount(name));

            // Update the global batch status with the batch type
            status.getTypes().add(batchType);
            status.setRunningExecutions(status.getRunningExecutions() + batchType.getRunningExecutions());
        });
        return status;
    }

    /***************************************/
    /** Batch "in" folder monitoring      **/
    /***************************************/

    /**
     * Called every minute to monitor the batch job "[jobName]/in" folders. If a file has been
     * placed in one of these folders, the cause the "jobName" batch job to be started.
     */
    @Schedule(persistent = false, second = "48", minute = "*/1", hour = "*/1")
    protected void monitorBatchJobInFolderInitiation() {

        // Resolve the list of batch job "in" folders
        List<Path> executionFolders = getBatchJobSubFolders("in");

        // Check for new batch-initiating files in each folder
        for (Path dir : executionFolders) {
            for (Path file : getDirectoryFiles(dir)) {
                String jobName = file.getParent().getParent().getFileName().toString();
                log.info("Found file " + file.getFileName() + " for batch job " + jobName);

                try {
                    startBatchJobWithDataFile(jobName, file, new HashMap<>());
                } catch (IOException e) {
                    log.error("Failed starting batch job " + jobName + " with file " + file.getFileName());
                } finally {
                    // Delete the file
                    // Note to self: Move to error folder?
                    try {
                        Files.delete(file);
                    } catch (IOException ignored) {
                    }
                }
            }
        }
    }

    /***************************************/
    /** Batch "execution" folder clean-up **/
    /***************************************/

    /**
     * Called every hour to clean up the batch job "[jobName]/execution" folders for expired files
     */
    @Schedule(persistent = false, second = "30", minute = "42", hour = "*/1")
    protected void cleanUpExpiredBatchJobFiles() {

        long t0 = System.currentTimeMillis();

        // Resolve the list of batch job "execution" folders
        List<Path> executionFolders = getBatchJobSubFolders("execution");

        // Compute the expiry time
        Calendar expiryDate = Calendar.getInstance();
        expiryDate.add(Calendar.DATE, -batchFileExpiryDays);

        // Clean up the files
        executionFolders.forEach(folder -> {
            try {
                Files.walkFileTree(folder, new SimpleFileVisitor<Path>() {
                    @Override
                    public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException {
                        if (isDirEmpty(dir)) {
                            log.debug("Deleting batch job directory :" + dir);
                            Files.delete(dir);
                        }
                        return FileVisitResult.CONTINUE;
                    }

                    @Override
                    public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
                        if (FileUtils.isFileOlder(file.toFile(), expiryDate.getTime())) {
                            log.debug("Deleting batch job file      :" + file);
                            Files.delete(file);
                        }
                        return FileVisitResult.CONTINUE;
                    }
                });
            } catch (IOException e) {
                log.error("Failed cleaning up " + folder + " batch job directory: " + e.getMessage(), e);
            }
        });

        log.debug(String.format("Cleaned up expired batch job files in %d ms", System.currentTimeMillis() - t0));
    }

    /** Returns the batch job root **/
    public Path getBatchJobRoot() {
        return batchJobRoot;
    }

    /** Returns the named sub-folders **/
    private List<Path> getBatchJobSubFolders(String subFolderName) {
        List<Path> subFolders = new ArrayList<>();
        try (DirectoryStream<Path> stream = Files.newDirectoryStream(batchJobRoot)) {
            stream.forEach(p -> {
                Path executionFolder = p.resolve(subFolderName);
                if (Files.isDirectory(executionFolder)) {
                    subFolders.add(executionFolder);
                }
            });
        } catch (IOException e) {
            log.debug("Failed finding '" + subFolderName + "' batch job folders: " + e);
        }
        return subFolders;
    }

    /** Returns the list of regular files in the given directory **/
    private List<Path> getDirectoryFiles(Path dir) {
        List<Path> files = new ArrayList<>();
        try (DirectoryStream<Path> stream = Files.newDirectoryStream(dir)) {
            for (Path p : stream) {
                if (Files.isReadable(p) && Files.isRegularFile(p) && !Files.isHidden(p)) {
                    files.add(p);
                }
            }
        } catch (IOException ignored) {
        }
        return files;
    }

    /** Returns if the given directory is empty **/
    private boolean isDirEmpty(final Path directory) throws IOException {
        try (DirectoryStream<Path> dirStream = Files.newDirectoryStream(directory)) {
            return !dirStream.iterator().hasNext();
        }
    }

}