edu.kit.dama.dataworkflow.util.DataWorkflowHelper.java Source code

Introduction

Here is the source code for edu.kit.dama.dataworkflow.util.DataWorkflowHelper.java
Source

/*
 * Copyright 2015 Karlsruhe Institute of Technology.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package edu.kit.dama.dataworkflow.util;

import edu.kit.dama.authorization.entities.GroupId;
import edu.kit.dama.authorization.entities.IAuthorizationContext;
import edu.kit.dama.authorization.entities.Role;
import edu.kit.dama.authorization.entities.UserId;
import edu.kit.dama.authorization.entities.impl.AuthorizationContext;
import edu.kit.dama.authorization.exceptions.UnauthorizedAccessAttemptException;
import edu.kit.dama.commons.types.DigitalObjectId;
import edu.kit.dama.mdm.base.UserData;
import edu.kit.dama.mdm.core.IMetaDataManager;
import edu.kit.dama.mdm.core.MetaDataManagement;
import edu.kit.dama.mdm.dataorganization.entity.core.IDataOrganizationNode;
import edu.kit.dama.mdm.dataorganization.entity.core.IFileTree;
import edu.kit.dama.mdm.dataorganization.service.core.DataOrganizationServiceLocal;
import edu.kit.dama.mdm.dataorganization.service.exception.EntityNotFoundException;
import edu.kit.dama.staging.ap.AbstractStagingAccessPoint;
import edu.kit.dama.staging.entities.StagingAccessPointConfiguration;
import edu.kit.dama.staging.entities.TransferClientProperties;
import edu.kit.dama.staging.entities.download.DOWNLOAD_STATUS;
import edu.kit.dama.staging.entities.download.DownloadInformation;
import edu.kit.dama.staging.exceptions.TransferPreparationException;
import edu.kit.dama.staging.services.impl.download.DownloadInformationServiceLocal;
import edu.kit.dama.staging.util.StagingConfigurationManager;
import edu.kit.dama.util.SystemUtils;
import edu.kit.dama.mdm.dataworkflow.ExecutionEnvironmentConfiguration;
import edu.kit.dama.mdm.dataworkflow.DataWorkflowTask;
import edu.kit.dama.dataworkflow.exceptions.StagingPreparationException;
import edu.kit.dama.dataworkflow.exceptions.UnsupportedOperatingSystemException;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map.Entry;
import java.util.Properties;
import java.util.Set;
import java.util.regex.Pattern;
import org.apache.commons.io.FileUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 *
 * @author mf6319
 */
public final class DataWorkflowHelper {

    private static final Logger LOGGER = LoggerFactory.getLogger(DataWorkflowHelper.class);
    private static final List<DataWorkflowTask.TASK_STATUS> FILTERED_STATUS_CODES;
    private static final List<DataWorkflowTask.TASK_STATUS> UNFILTERED_STATUS_CODES;

    static {
        FILTERED_STATUS_CODES = new ArrayList<>();
        UNFILTERED_STATUS_CODES = new ArrayList<>();
        for (DataWorkflowTask.TASK_STATUS s : DataWorkflowTask.TASK_STATUS.values()) {
            UNFILTERED_STATUS_CODES.add(s);
            if (!DataWorkflowTask.TASK_STATUS.isErrorState(s) && !DataWorkflowTask.TASK_STATUS.isFinishedState(s)) {
                FILTERED_STATUS_CODES.add(s);
            }
        }
    }
    public final static String DATA_IN_DIR = "data_in";
    public final static String DATA_OUT_DIR = "data_out";
    public final static String WORKING_DIR = "working";
    public final static String TEMP_DIR = "temp";

    public static final String DATA_IN_DIR_VARIABLE = "${data.input.dir}";
    public static final String DATA_OUT_DIR_VARIABLE = "${data.output.dir}";
    public static final String TEMP_DIR_VARIABLE = "${temp.dir}";
    public static final String WORKING_DIR_VARIABLE = "${working.dir}";

    /**
     * Obtain the base path for data access for the provided task from the
     * repository perspective, which means, that the local base path of the
     * AccessPoint associated with the ExecutionEnvironment of the task will be
     * the first part of this base path. The base path will contain directories
     * for input and output data, temporary files and the working directory
     * containing the user application. The base path consists of the following
     * parts:
     * <ul>
     * <li>pTask.getConfiguration().getLocalBasePath()</li>
     * <li>pContext.getUserId().getStringRepresentation()</li>
     * <li>pTask.getId().toString()</li>
     * </ul>
     *
     * For obtaining the base path from the perspective of the execution
     * environment the method
     * {@link #getExecutionBasePath(edu.kit.dama.mdm.dataworkflow.DataWorkflowTask)}
     * must be used.
     *
     * @param pTask The task for which the base path should be obtained.
     *
     * @return The staging base path as file.
     *
     * @throws IOException if no staging path could be obtained.
     */
    public static File getStagingBasePath(DataWorkflowTask pTask) throws IOException {
        LOGGER.debug("Obtaining staging path for task with id {}", pTask.getId());
        ExecutionEnvironmentConfiguration env = pTask.getExecutionEnvironment();
        LOGGER.debug("Used execution environment has id {}", env.getId());
        if (env == null) {
            throw new IOException("Failed to obtain execution environment. No staging path available.");
        }
        String accessPointId = env.getStagingAccessPointId();
        LOGGER.debug("Using access point with id {}", accessPointId);
        StagingAccessPointConfiguration accessPointConfig = StagingConfigurationManager.getSingleton()
                .getAccessPointConfigurationById(accessPointId);
        if (accessPointConfig == null) {
            throw new IOException("Failed to obtain access point configuration. No staging path available.");
        } else {
            LOGGER.debug("Obtained access point configuration.");
        }
        //Get local base path of access point. This is be the repository-accessible path that will be available for the task at execution time
        //under env.getAccessPointLocalBasePath()
        //For the moment we use this path to setup our task-specific folder structure:
        //<BASE_PATH>/<USER_ID>/<JOB_ID>/DATA_IN, DATA_OUT, WORKING, TEMP
        String localBasePath = accessPointConfig.getLocalBasePath();
        LOGGER.debug("Obtained local base path {}", localBasePath);
        //obtain task base path <BASE_PATH>/<USER_ID>/<JOB_ID>/
        return new File(localBasePath, getTaskContext(pTask).getUserId().getStringRepresentation() + File.separator
                + pTask.getId().toString() + File.separator);
    }

    /**
     * FileFilter for extracting all files in a folder, except directories and
     * the file 'dataworkflow_substitution'.
     */
    private final static FileFilter VAR_FILTER = new FileFilter() {
        @Override
        public boolean accept(File pathname) {
            return (pathname.isFile() && !pathname.getName().equals("dataworkflow_substitution"));
        }
    };

    /**
     * Get the base path within the execution environment. This path reflects
     * the same physical location as the result of {@link #getStagingBasePath(edu.kit.dama.mdm.dataworkflow.DataWorkflowTask)
     * }.
     *
     * @param pTask The task for which the base path should be obtained.
     *
     * @return The execution base path as file.
     */
    public static File getExecutionBasePath(DataWorkflowTask pTask) {
        //Get local base path of the execution environment. This might a a non-repository-accessible path that will be available for the task at execution time.
        //The path is set up according to the following schema: 
        //<BASE_PATH>/<USER_ID>/<JOB_ID>/DATA_IN, DATA_OUT, WORKING, TEMP
        ExecutionEnvironmentConfiguration env = pTask.getExecutionEnvironment();
        String localBasePath = env.getAccessPointLocalBasePath();
        //obtain task base path <BASE_PATH>/<USER_ID>/<JOB_ID>/
        return new File(localBasePath, getTaskContext(pTask).getUserId().getStringRepresentation() + File.separator
                + pTask.getId().toString() + File.separator);
    }

    /**
     * Get the task input directory which is a sub-directory named
     * {@link #DATA_IN_DIR} of task base path.
     *
     * @param pTaskBasePath The task base path obtained using {@link #getStagingBasePath(edu.kit.dama.mdm.dataworkflow.DataWorkflowTask)
     * } or {@link #getExecutionBasePath(edu.kit.dama.mdm.dataworkflow.DataWorkflowTask)
     * }.
     *
     * @return The task input directory.
     */
    public static File getTaskInputDirectory(File pTaskBasePath) {
        return new File(pTaskBasePath, DATA_IN_DIR);
    }

    /**
     * Get the task output directory which is a sub-directory named
     * {@link #DATA_OUT_DIR} of task base path.
     *
     * @param pTaskBasePath The task base path obtained using {@link #getStagingBasePath(edu.kit.dama.mdm.dataworkflow.DataWorkflowTask)
     * } or {@link #getExecutionBasePath(edu.kit.dama.mdm.dataworkflow.DataWorkflowTask)
     * }.
     *
     * @return The task output directory.
     */
    public static File getTaskOutputDirectory(File pTaskBasePath) {
        return new File(pTaskBasePath, DATA_OUT_DIR);
    }

    /**
     * Get the task temp directory which is a sub-directory named
     * {@link #TEMP_DIR} of task base path.
     *
     * @param pTaskBasePath The task base path obtained using {@link #getStagingBasePath(edu.kit.dama.mdm.dataworkflow.DataWorkflowTask)
     * } or {@link #getExecutionBasePath(edu.kit.dama.mdm.dataworkflow.DataWorkflowTask)
     * }.
     *
     * @return The task temp directory.
     */
    public static File getTaskTempDirectory(File pTaskBasePath) {
        return new File(pTaskBasePath, TEMP_DIR);
    }

    /**
     * Get the task working directory which is a sub-directory named
     * {@link #WORKING_DIR} of task base path.
     *
     * @param pTaskBasePath The task base path obtained using {@link #getStagingBasePath(edu.kit.dama.mdm.dataworkflow.DataWorkflowTask)
     * } or {@link #getExecutionBasePath(edu.kit.dama.mdm.dataworkflow.DataWorkflowTask)
     * }.
     *
     * @return The task working directory.
     */
    public static File getTaskWorkingDirectory(File pTaskBasePath) {
        return new File(pTaskBasePath, WORKING_DIR);
    }

    /**
     * Obtain the contact entity for the provided task. If no entity is found,
     * UserData.NO_USER will be returned.
     *
     * @param pTask The task for which to obtain the contact information.
     *
     * @return The contact entity or UserData.NO_USER.
     */
    public static UserData getContact(DataWorkflowTask pTask) {
        //setting default user mail value
        UserData result = UserData.WORLD_USER;

        IMetaDataManager mdm = MetaDataManagement.getMetaDataManagement().getMetaDataManager();
        IAuthorizationContext ctx = getTaskContext(pTask);
        mdm.setAuthorizationContext(ctx);
        try {
            LOGGER.debug("Getting contact information for task with id {}", pTask.getId());
            result = mdm.findSingleResult("SELECT o FROM UserData o WHERE o.distinguishedName = \""
                    + ctx.getUserId().getStringRepresentation() + "\"", UserData.class);
        } catch (UnauthorizedAccessAttemptException ex) {
            LOGGER.warn("Failed to access user data for user with id " + ctx.getUserId()
                    + ". Returning UserData.NO_USER", ex);
        } finally {
            mdm.close();
        }
        return result;
    }

    /**
     * Schedule the staging process(es) for the data needed by the provided
     * task. The process contains the following points:
     * <ul>
     * <li>Creation of the task base path including data_in, data_out, working
     * and temp directories.</li>
     * <li>Obtaining selected data organization views of digital objects to
     * stage.</li>
     * <li>Schedule downloads for the content of each digital object.</li>
     * <li>Create symbolic links of the first-level content (all nodes directly
     * below 'root') of each view into 'data_in'. Existing files will be
     * skipped.</li>
     * </ul>
     * As soon as the staging for all digital objects is done, the symbolic
     * links in the data_in directory should point the valid data located inside
     * the different staging locations.
     *
     * @param pTask The task for which the staging should be scheduled.
     *
     * @return A properties object containing the object-transferId mapping.
     * This mapping should be stored in the DataWorkflowTask in order to be able
     * to check the data staging process.
     *
     * @throws StagingPreparationException if anything fails.
     */
    public static Properties scheduleStaging(DataWorkflowTask pTask) throws StagingPreparationException {
        Properties objectDownloadMap = new Properties();
        IAuthorizationContext ctx = getTaskContext(pTask);
        File taskBasePath;
        try {
            taskBasePath = getStagingBasePath(pTask);
        } catch (IOException ex) {
            throw new StagingPreparationException("Staging preparation failed.", ex);
        }
        LOGGER.debug("Checking task base path {}", taskBasePath);
        if (taskBasePath.exists()) {
            LOGGER.debug("Task base path at " + taskBasePath + " already exists.");
        } else {
            LOGGER.debug("Task base path does not exist. Creating directory strucute {}", taskBasePath);
            if (!taskBasePath.mkdirs()) {
                throw new StagingPreparationException("Failed to create task base path at " + taskBasePath);
            }
            LOGGER.debug("Task base path structure successfully created.");
        }

        File inputDir = getTaskInputDirectory(taskBasePath);
        File outputDir = getTaskOutputDirectory(taskBasePath);
        File tempDir = getTaskTempDirectory(taskBasePath);
        File workingDir = getTaskWorkingDirectory(taskBasePath);
        LOGGER.debug("Creating directories:");
        LOGGER.debug(" - Input: {}", inputDir);
        LOGGER.debug(" - Output: {}", outputDir);
        LOGGER.debug(" - Working: {}", workingDir);
        LOGGER.debug(" - Temp: {}", tempDir);

        LOGGER.debug("Obtaining object-view list for DataWorkflow task {}", pTask.getId());
        Properties objectViewMap = null;

        try {
            objectViewMap = pTask.getObjectViewMapAsObject();
        } catch (IOException ex) {
            throw new StagingPreparationException(
                    "Failed to deserialize object-view list from task " + pTask.getId());
        }

        try {
            TransferClientProperties props = new TransferClientProperties();
            String accessPointId = pTask.getExecutionEnvironment().getStagingAccessPointId();
            AbstractStagingAccessPoint accessPoint = StagingConfigurationManager.getSingleton()
                    .getAccessPointById(accessPointId);
            LOGGER.debug("Adding staging acccess point id {} to TransferClientProperties.", accessPointId);
            props.setStagingAccessPointId(accessPointId);
            String mail = getContact(pTask).getEmail();
            LOGGER.debug("Adding contact mail {} to TransferClientProperties.", mail);
            props.setReceiverMail(mail);
            Set<Entry<Object, Object>> entries = objectViewMap.entrySet();

            LOGGER.debug("Scheduling download for {} objects in object-view map", entries.size());
            for (Entry<Object, Object> entry : entries) {
                String objectId = (String) entry.getKey();
                String viewId = (String) entry.getValue();
                DigitalObjectId doid = new DigitalObjectId(objectId);
                IFileTree tree = DataOrganizationServiceLocal.getSingleton().loadFileTree(doid, viewId, ctx);
                LOGGER.debug("Scheduling download for object {} and view {}", objectId, viewId);
                DownloadInformation downloadInfo = DownloadInformationServiceLocal.getSingleton()
                        .scheduleDownload(doid, tree, props, ctx);
                LOGGER.debug("Putting transfer id {} for object {} to object-transfer list.", downloadInfo.getId(),
                        objectId);
                objectDownloadMap.put(doid.getStringRepresentation(), Long.toString(downloadInfo.getId()));

                List<? extends IDataOrganizationNode> firstLevelNodes = tree.getRootNode().getChildren();
                LOGGER.debug("Creating links for {} first level data organization nodes", firstLevelNodes.size());
                for (IDataOrganizationNode node : firstLevelNodes) {
                    File linkedFile = new File(inputDir + File.separator + node.getName());
                    LOGGER.debug("Creating link for file {}", linkedFile);
                    if (linkedFile.exists()) {
                        LOGGER.error("File link " + linkedFile
                                + " already exists. Skipping link creation but processing might fail.");
                    } else {
                        LOGGER.debug("Obtaining data path from download information");
                        File dataPath = accessPoint.getLocalPathForUrl(downloadInfo.getDataFolderUrl(), ctx);
                        LOGGER.debug("Obtained data path is '{}'. Creating symbolic link to input dir at '{}'",
                                dataPath, linkedFile);
                        SystemUtils.createSymbolicLink(new File(dataPath, node.getName()), linkedFile);
                        LOGGER.debug("Link successfully created.");
                    }
                }
                LOGGER.debug("Staging of object {} for task {} successfully scheduled.", objectId, pTask.getId());
            }
            LOGGER.debug("Scheduling of all objects for task {} successfully finished.", pTask.getId());
        } catch (IOException | EntityNotFoundException | TransferPreparationException ex) {
            //Failed to create link/view not found/transfer preparation has failed
            throw new StagingPreparationException(
                    "Failed to prepare task directory structure for task " + pTask.getId(), ex);
        }
        return objectDownloadMap;
    }

    /**
     * Abort the staging process for all input objects of the provided task. The
     * associated download entities will be set to DOWNLOAD_REMOVED.
     *
     * @param pTask The task for which the staging should be aborted.
     */
    public static void abortStaging(DataWorkflowTask pTask) {
        try {
            //update status
            Properties dataMap = pTask.getObjectTransferMapAsObject();
            Set<Entry<Object, Object>> entries = dataMap.entrySet();
            IAuthorizationContext ctx = DataWorkflowHelper.getTaskContext(pTask);
            for (Entry<Object, Object> entry : entries) {
                String objectId = (String) entry.getKey();
                Long transferId = Long.parseLong((String) entry.getValue());
                LOGGER.debug("Checking download status for object {} with download id {}", objectId, transferId);
                if (DownloadInformationServiceLocal.getSingleton().updateStatus(transferId,
                        DOWNLOAD_STATUS.DOWNLOAD_REMOVED.getId(), "Download aborted.", ctx) != 1) {
                    LOGGER.warn("Download with id {} was not properly aborted.", transferId);
                } else {
                    LOGGER.debug("Download with id {} aborted.", transferId);
                }
            }
        } catch (IOException ex) {
            LOGGER.error("Failed to obtain object-transfer map. Unable to abort staging.", ex);
        }
    }

    /**
     * Get the main executable of the user application of the provided task.
     * This method will check for an executable run.sh or run.bat depending on
     * the detected operating system. Currently, Windows, Unix and MacOSX are
     * detected. If the OS check determines another system, an
     * {@link UnsupportedOperatingSystemException} will be thrown. This is also
     * the case, if the user application does not contain an according start
     * script (run.bat for Windows or run.sh for Unix/MacOSX). In this case, the
     * application is not supporting the OS of the execution environment.
     *
     * @param pTask The task for which the main executable should be determined.
     *
     * @return The file location of the main executable within the execution
     * environment.
     *
     * @throws UnsupportedOperatingSystemException If the user application of
     * pTask does not provide an executable for this operating system.
     */
    public static File getTaskMainExecutable(DataWorkflowTask pTask) throws UnsupportedOperatingSystemException {
        File basePath = getExecutionBasePath(pTask);
        File workingDir = getTaskWorkingDirectory(basePath);
        File mainExecutable = null;
        if (org.apache.commons.lang3.SystemUtils.IS_OS_WINDOWS) {
            mainExecutable = new File(workingDir, "run.bat");
        } else if (org.apache.commons.lang3.SystemUtils.IS_OS_UNIX
                || org.apache.commons.lang3.SystemUtils.IS_OS_MAC_OSX) {
            mainExecutable = new File(workingDir, "run.sh");
        }

        LOGGER.info("Main executable is: {}", mainExecutable);
        if (mainExecutable == null) {
            //no OS branch entered, OS not supported.
            throw new UnsupportedOperatingSystemException(
                    "Your system was not detected to be Windows, Unix or MacOSX. Unable to continue.");
        } else if (!mainExecutable.exists()) {
            //executable does not exists, application support for OS not available.
            throw new UnsupportedOperatingSystemException("The user application of task with the id "
                    + pTask.getId() + " is not supporting your operating system. Unable to continue.");
        }
        //return the main executable which shouldn't be null and should exist
        return mainExecutable;
    }

    /**
     * Search within the working directory of the provided task for files, where
     * DataWorkflow variables (e.g. working-, temp-, input- and
     * output-directory) should be substituted. Substitutions will be applied to
     * the base path of pTask and all files in directories containing a file
     * named 'dataworkflow_substitution'. If this file is not within a
     * directory, variable substitution will be skipped for this directory.
     *
     * @param pTask The task whose working directory should be checked for
     * substitution.
     *
     * @throws IOException If the replacement operation fails for some reason.
     * @throws URISyntaxException If any of the URLs in the task (input, output,
     * temp or working dir URL) is invalid.
     */
    public static void substituteVariablesInDirectory(DataWorkflowTask pTask)
            throws IOException, URISyntaxException {
        //perform substitution in working dir in every case...then continue
        LOGGER.debug("Obtaining task working directory for variable substitution.");
        File workingDir = getTaskWorkingDirectory(getStagingBasePath(pTask));
        LOGGER.debug("Performing substitution in working directory {}.", workingDir);
        performSubstitution(pTask, workingDir);
        //perform substitution in sub-folders
        LOGGER.debug("Performing substitution in sub-directories of working directory {}.", workingDir);
        substituteVariablesRecursive(pTask, getStagingBasePath(pTask));
        LOGGER.debug("Variable substitution finished.");
    }

    /**
     * Internal method for recursive substition within pTargetPath.
     *
     * @param pTask The task whose working directory should be checked for
     * substitution.
     * @param pTargetPath The target path.
     *
     * @throws IOException If the replacement operation fails for some reason.
     * @throws URISyntaxException If any of the URLs in the task (input, output,
     * temp or working dir URL) is invalid.
     */
    private static void substituteVariablesRecursive(DataWorkflowTask pTask, File pTargetPath)
            throws IOException, URISyntaxException {
        if (pTargetPath == null || !pTargetPath.exists()) {
            LOGGER.warn("Argument pTargetPath must not be 'null' and must exist");
            return;
        }
        LOGGER.info("Checking directory '" + pTargetPath.getPath() + "'");

        //get a list of relevant files
        File[] relevantFileList = pTargetPath.listFiles(new FileFilter() {
            @Override
            public boolean accept(File pathname) {
                //accept a file only if it is a directory or
                //if it is a file called "replace_vars", which marks directories that should be affected by replacements
                return (pathname.isDirectory()
                        || (pathname.isFile() && pathname.getName().equals("dataworkflow_substitution")));
            }
        });

        //go through all listed files within pTargetPath
        for (File relevantFile : relevantFileList) {
            if (relevantFile.isDirectory()) {
                //continue recursively
                substituteVariablesRecursive(pTask, relevantFile);
            } else {
                //due to the filtering we have now a replace_var file. Therefore we have to replace variables in all files within its parent directory.
                performSubstitution(pTask, relevantFile.getParentFile());
            }
        }
    }

    /**
     * Helper method to perform the actual substitution.
     *
     * @param pTask The task whose working directory should be checked for
     * substitution.
     * @param pTargetPath The target path.
     *
     * @throws IOException If the replacement operation fails for some reason.
     * @throws URISyntaxException If any of the URLs in the task (input, output,
     * temp or working dir URL) is invalid.
     */
    private static void performSubstitution(DataWorkflowTask pTask, File pDirectory)
            throws IOException, URISyntaxException {
        File[] relevantFileList = pDirectory.listFiles(VAR_FILTER);
        LOGGER.info("Substituting variables in " + relevantFileList.length
                + ((relevantFileList.length == 1) ? " file" : " files"));

        for (File f : relevantFileList) {
            if (f.length() > 10 * FileUtils.ONE_MB) {
                LOGGER.warn(
                        "File {} has a size of {} bytes. Variable substitution is only supported for files with less than 10MB. File is skipped.",
                        f, f.length());
                continue;
            }
            //perform replacement
            LOGGER.info(" * Substituting variables in file '" + f.getPath() + "'");
            DataInputStream din = null;
            FileOutputStream fout = null;
            try {
                LOGGER.info("   - Reading input file");
                byte[] data = new byte[(int) f.length()];
                din = new DataInputStream(new FileInputStream(f));
                din.readFully(data);

                LOGGER.info("   - Substituting variables");
                String dataString = new String(data);

                String accessPointId = pTask.getExecutionEnvironment().getStagingAccessPointId();
                AbstractStagingAccessPoint accessPoint = StagingConfigurationManager.getSingleton()
                        .getAccessPointById(accessPointId);
                LOGGER.debug("  - Obtaining local path for input dir URL {}", pTask.getInputDirectoryUrl());
                File localPath = accessPoint.getLocalPathForUrl(new URL(pTask.getInputDirectoryUrl()),
                        getTaskContext(pTask));
                LOGGER.debug("  - Local path is: {}", localPath);
                String inputDirReplacement = localPath.getCanonicalPath();

                LOGGER.debug("  - Obtaining local path for output dir URL {}", pTask.getOutputDirectoryUrl());
                localPath = accessPoint.getLocalPathForUrl(new URL(pTask.getOutputDirectoryUrl()),
                        getTaskContext(pTask));
                String outputDirReplacement = localPath.getCanonicalPath();

                LOGGER.debug("  - Obtaining local path for working dir URL {}", pTask.getWorkingDirectoryUrl());
                localPath = accessPoint.getLocalPathForUrl(new URL(pTask.getWorkingDirectoryUrl()),
                        getTaskContext(pTask));
                String workingDirReplacement = localPath.getCanonicalPath();

                LOGGER.debug("  - Obtaining local path for temp dir URL {}", pTask.getTempDirectoryUrl());
                localPath = accessPoint.getLocalPathForUrl(new URL(pTask.getTempDirectoryUrl()),
                        getTaskContext(pTask));
                String tempDirReplacement = localPath.getCanonicalPath();

                LOGGER.info("     " + DATA_IN_DIR + ": " + inputDirReplacement);
                LOGGER.info("     " + DATA_OUT_DIR + ": " + outputDirReplacement);
                LOGGER.info("     " + TEMP_DIR + ": " + tempDirReplacement);
                LOGGER.info("     " + WORKING_DIR + ": " + workingDirReplacement);
                //replace all variables
                //To obtain a proper path format the input paths are put into a file object and the URI path is used for replacement. Therefore differences between
                //source and destination platform are not relevant. Due to the URI.toPath() returns the path with leading slash, we use the path beginning with
                //the second index to avoid problems with other programming languages not able to deal with the leading slash.
                dataString = dataString.replaceAll(Pattern.quote(DATA_IN_DIR_VARIABLE), inputDirReplacement)
                        .replaceAll(Pattern.quote(DATA_OUT_DIR_VARIABLE), outputDirReplacement)
                        .replaceAll(Pattern.quote(TEMP_DIR_VARIABLE), tempDirReplacement)
                        .replaceAll(Pattern.quote(WORKING_DIR_VARIABLE), workingDirReplacement);
                LOGGER.info("   - Writing output file");
                fout = new FileOutputStream(f);
                fout.write(dataString.getBytes());
                fout.flush();
                LOGGER.info(" * Substituting operations finished successfully");
            } finally {
                try {
                    if (din != null) {
                        din.close();
                    }
                } catch (IOException ioe) {
                }
                try {
                    if (fout != null) {
                        fout.close();
                    }
                } catch (IOException ioe) {
                }
            }
        }
        LOGGER.info("Directory {} processed successfully", pDirectory);
    }

    /**
     * Get the authorization context used to execute the provided task. To
     * obtain the context, {@link DataWorkflowTask#getExecutorId() } and {@link DataWorkflowTask#getExecutorGroupId()
     * } are used defining UserId and GroupId of the returned
     * AuthorizationContext.
     *
     * @param pTask The task for which the context should be obtained.
     *
     * @return The AuthorizationContext for pTask.
     */
    public static IAuthorizationContext getTaskContext(DataWorkflowTask pTask) {
        return new AuthorizationContext(new UserId(pTask.getExecutorId()), new GroupId(pTask.getExecutorGroupId()),
                Role.MEMBER);
    }

    /**
     * Get the list of DataWorkflowTasks that have to be processed. The list can
     * be filtered by ids or by the status of contained tasks and it can be
     * limited to a max. size. In every case the list is storted ascending by
     * the last update of the contained tasks.
     *
     * @param pTaskIds The tasks with these Ids will be in the result list. If
     * argument pFilter is TRUE, the result list might be smaller if one or more
     * tasks are in a filtered status.
     * @param pMaxResults The max. number of tasks that will be returned. If
     * argument <i>pTaskIds</i> is provided <i>pMaxResults</i> is ignored and
     * the size of <i>pTaskIds</i> is the max. number of results.
     * @param pFilter Only select tasks that are active (not in a finished or
     * failed state).
     *
     * @return The list of DataWorkflow tasks.
     *
     * @throws UnauthorizedAccessAttemptException If one or more tasks could not
     * be accessed.
     */
    public static List<DataWorkflowTask> getDataWorkflowTasks(List<Long> pTaskIds, int pMaxResults, boolean pFilter)
            throws UnauthorizedAccessAttemptException {
        IMetaDataManager mdm = MetaDataManagement.getMetaDataManagement().getMetaDataManager();
        List<DataWorkflowTask> result;
        try {
            mdm.setAuthorizationContext(AuthorizationContext.factorySystemContext());
            List<DataWorkflowTask.TASK_STATUS> statusList = (pFilter) ? FILTERED_STATUS_CODES
                    : UNFILTERED_STATUS_CODES;
            if (pTaskIds == null || pTaskIds.isEmpty()) {
                LOGGER.debug("Obtaining all DataWorkflow tasks.");
                result = mdm.findResultList(
                        "SELECT t FROM DataWorkflowTask t WHERE t.status IN :1 ORDER BY t.lastUpdate ASC",
                        new Object[] { statusList }, DataWorkflowTask.class, 0, pMaxResults);
            } else {
                LOGGER.debug("Obtaining DataWorkflow tasks for ids {}.", pTaskIds);
                result = mdm.findResultList(
                        "SELECT t FROM DataWorkflowTask t WHERE t.id IN :1 AND t.status IN :2 ORDER BY t.lastUpdate ASC",
                        new Object[] { pTaskIds, statusList }, DataWorkflowTask.class);
            }
        } finally {
            mdm.close();
        }
        return result;
    }

    /**
     * Check whether the provided ExecutionEnvironment is capable of executing
     * another task. Therefor, the database is queried for all DataWorkflow
     * tasks currently running on the provided ExecutionEnvironment. This number
     * is compared to the max. number of tasks configured for the
     * ExectutionEnvironment. If another task can be scheduled, TRUE is
     * returned. Otherwise, this method returns FALSE and the submission has to
     * be postponed.
     *
     * @param pConfiguration The configuration of the ExecutionEnvironment that
     * will be checked for running tasks.
     *
     * @return TRUE if the provided ExecutionEnvironment can take another task.
     *
     * @throws UnauthorizedAccessAttemptException The the query to the database
     * failed.
     */
    public static boolean canScheduleTask(ExecutionEnvironmentConfiguration pConfiguration)
            throws UnauthorizedAccessAttemptException {
        IMetaDataManager mdm = MetaDataManagement.getMetaDataManagement().getMetaDataManager();
        long cnt = 0;
        try {
            mdm.setAuthorizationContext(AuthorizationContext.factorySystemContext());
            LOGGER.debug("Obtaining all DataWorkflow tasks running within execution environment {}.",
                    pConfiguration.getId());
            List<DataWorkflowTask.TASK_STATUS> processing = Arrays.asList(DataWorkflowTask.TASK_STATUS.PROCESSING);
            cnt = mdm.findSingleResult(
                    "SELECT COUNT(t) FROM DataWorkflowTask t WHERE t.status IN :1 AND t.executionEnvironment.id="
                            + pConfiguration.getId(),
                    new Object[] { processing }, Long.class);
            LOGGER.debug(
                    "Found {} running task(s). Comparing with max. number of tasks supported by ExecutionEnvironment ({}).",
                    cnt, pConfiguration.getMaxParallelTasks());
        } finally {
            mdm.close();
        }
        return pConfiguration.getMaxParallelTasks() > cnt;
    }
}