cross.datastructures.pipeline.ResultAwareCommandPipeline.java Source code

Java tutorial

Introduction

Here is the source code for cross.datastructures.pipeline.ResultAwareCommandPipeline.java

Source

/* Cross, common runtime object support system.
 * Copyright (C) 2008-2012, The authors of Cross. All rights reserved.
 *
 * Project website: http://maltcms.sf.net
 *
 * Cross may be used under the terms of either the
 *
 * GNU Lesser General Public License (LGPL)
 * http://www.gnu.org/licenses/lgpl.html
 *
 * or the
 *
 * Eclipse Public License (EPL)
 * http://www.eclipse.org/org/documents/epl-v10.php
 *
 * As a user/recipient of Cross, you may choose which license to receive the code
 * under. Certain files or entire directories may not be covered by this
 * dual license, but are subject to licenses compatible to both LGPL and EPL.
 * License exceptions are explicitly declared in all relevant files or in a
 * LICENSE file in the relevant directories.
 *
 * Cross is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE. Please consult the relevant license documentation
 * for details.
 */
package cross.datastructures.pipeline;

import cross.annotations.AnnotationInspector;
import cross.commands.fragments.IFragmentCommand;
import cross.datastructures.fragments.IFileFragment;
import cross.datastructures.tools.FileTools;
import cross.datastructures.tuple.TupleND;
import cross.datastructures.workflow.IWorkflow;
import cross.exception.ConstraintViolationException;
import cross.exception.ExitVmException;
import cross.tools.PublicMemberGetters;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.math.BigInteger;
import java.net.URI;
import java.nio.file.Files;
import java.nio.file.StandardOpenOption;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.logging.Level;
import java.util.logging.Logger;
import lombok.Data;
import lombok.extern.slf4j.Slf4j;
import net.sf.mpaxs.api.ConfigurationKeys;
import net.sf.mpaxs.api.ExecutionType;
import net.sf.mpaxs.spi.concurrent.ComputeServerFactory;
import org.apache.commons.configuration.ConfigurationException;
import org.apache.commons.configuration.ConfigurationUtils;
import org.apache.commons.configuration.PropertiesConfiguration;
import org.apache.commons.configuration.reloading.FileChangedReloadingStrategy;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.builder.HashCodeBuilder;
import org.openide.util.lookup.ServiceProvider;

/**
 * Extension of CommandPipeline that knows how to skip existing results.
 *
 * @author Nils Hoffmann
 * @see ICommandSequence
 */
@Slf4j
@Data
@ServiceProvider(service = ICommandSequence.class, position = Integer.MIN_VALUE + 1)
public class ResultAwareCommandPipeline extends CommandPipeline {

    //    @Getter(AccessLevel.PROTECTED)
    //    @Setter(AccessLevel.PROTECTED)
    //    private boolean loadedPreviousWorkflowResults = false;
    /**
     * Create a new command pipeline instance.
     */
    public ResultAwareCommandPipeline() {
    }

    private PropertiesConfiguration hashes;

    /**
     *
     * @param inputFiles
     * @return
     */
    protected Collection<File> getInputFiles(TupleND<IFileFragment> inputFiles) {
        List<File> inputFileList = new ArrayList<>();
        for (IFileFragment f : inputFiles) {
            try {
                inputFileList.add(FileTools.getFile(f));
            } catch (IOException ioex) {
                throw new ConstraintViolationException(ioex);
            }
        }
        return inputFileList;
    }

    /**
     *
     * @param workflow
     * @return
     */
    protected PropertiesConfiguration getHashes(IWorkflow workflow) {
        File hashesFile = getHashesFile(workflow);
        if (hashes == null) {
            try {
                hashes = new PropertiesConfiguration(hashesFile);
                hashes.setAutoSave(true);
                hashes.setReloadingStrategy(new FileChangedReloadingStrategy());
                log.debug("Hashes: {}", ConfigurationUtils.toString(hashes));
                return hashes;
            } catch (ConfigurationException ex) {
                log.error("Could not load configuration at " + hashesFile, ex);
                throw new ExitVmException(ex);
            }
        } else {
            return hashes;
        }
    }

    /**
     *
     * @param workflow
     * @return
     */
    protected File getHashesFile(IWorkflow workflow) {
        return new File(workflow.getOutputDirectory(), ".hashes");
    }

    /**
     *
     * @param inputFiles
     * @param cmd
     */
    protected void updateHashes(TupleND<IFileFragment> inputFiles, IFragmentCommand cmd) {
        PropertiesConfiguration pc = getHashes(cmd.getWorkflow());
        Collection<File> files = getInputFiles(inputFiles);
        files.add(cmd.getWorkflow().getOutputDirectory(cmd));
        String fileHash = getRecursiveFileHash(files);
        String parametersHash = getParameterHash(cmd);
        pc.setProperty(getFileHashKey(cmd), fileHash);
        pc.setProperty(getParametersHashKey(cmd), parametersHash);
    }

    /**
     *
     * @param cmd
     * @return
     */
    protected String getFileHashKey(IFragmentCommand cmd) {
        URI fragmentCommandOutputDirectory = cmd.getWorkflow().getOutputDirectory(cmd).getAbsoluteFile().toURI();
        URI relativeFile = FileTools.getRelativeUri(
                cmd.getWorkflow().getOutputDirectory().getAbsoluteFile().toURI(), fragmentCommandOutputDirectory);
        return cmd.getClass().getName() + "-" + relativeFile.getPath() + ".fileHash";
    }

    /**
     *
     * @param cmd
     * @return
     */
    protected String getParametersHashKey(IFragmentCommand cmd) {
        URI fragmentCommandOutputDirectory = cmd.getWorkflow().getOutputDirectory(cmd).getAbsoluteFile().toURI();
        URI relativeFile = FileTools.getRelativeUri(
                cmd.getWorkflow().getOutputDirectory().getAbsoluteFile().toURI(), fragmentCommandOutputDirectory);
        return cmd.getClass().getName() + "-" + relativeFile.getPath() + ".parametersHash";
    }

    /**
     *
     * @param inputFiles
     * @param workflow
     * @return
     */
    protected boolean isInputUpToDate(TupleND<IFileFragment> inputFiles, IWorkflow workflow) {
        PropertiesConfiguration pc = getHashes(workflow);
        Collection<File> files = getInputFiles(inputFiles);
        String oldWorkflowInputFilesHash = pc.getString(workflow.getName() + ".inputFiles.fileHash", "");
        String workflowInputFilesHash = getRecursiveFileHash(files);
        return oldWorkflowInputFilesHash.equals(workflowInputFilesHash);
    }

    /**
     *
     * @param inputFiles
     * @param workflow
     */
    protected void updateWorkflowInputHashes(TupleND<IFileFragment> inputFiles, IWorkflow workflow) {
        PropertiesConfiguration pc = getHashes(workflow);
        Collection<File> files = getInputFiles(inputFiles);
        String fileHash = getRecursiveFileHash(files);
        pc.setProperty(workflow.getName() + ".inputFiles.fileHash", fileHash);
    }

    /**
     *
     * @param inputFiles
     * @param cmd
     * @return
     */
    protected boolean isUpToDate(TupleND<IFileFragment> inputFiles, IFragmentCommand cmd) {
        PropertiesConfiguration pc = getHashes(cmd.getWorkflow());
        String oldOutputDirHash = pc.getString(getFileHashKey(cmd), "");
        Collection<File> files = getInputFiles(inputFiles);
        files.add(cmd.getWorkflow().getOutputDirectory(cmd));
        log.debug("Files to hash: {}", files);
        String outputDirHash = getRecursiveFileHash(files);
        log.debug("Old file hash: {} New file hash: {}", oldOutputDirHash, outputDirHash);
        boolean outputDirHashesEqual = oldOutputDirHash.equals(outputDirHash);
        String oldParameterHash = pc.getString(getParametersHashKey(cmd), "");
        String parameterHash = getParameterHash(cmd);
        log.debug("Old parameter hash: {} New parameter hash: {}", oldParameterHash, parameterHash);
        log.debug("Is up to date: {}: {}", cmd.getClass().getName(),
                outputDirHashesEqual && oldParameterHash.equals(parameterHash));
        return outputDirHashesEqual && oldParameterHash.equals(parameterHash);
    }

    /**
     * Used to check, whether any files have changed compared to the last
     * invocation.
     *
     * @param inputFiles
     * @return
     */
    protected String getRecursiveFileHash(Collection<File> inputFiles) {
        //list files below input and output
        SortedSet<File> files = new TreeSet<>();
        for (File file : inputFiles) {
            if (file.isDirectory()) {
                files.addAll(FileUtils.listFiles(file, new String[] { "*" }, true));
            } else {
                files.add(file);
            }
        }
        return digest(files);
    }

    /**
     * Calculates a byte-level digest of the given files.
     *
     * @param files the files to calculate the digest for.
     * @return the hexadecimal, zero-padded digest, or null if any exceptions
     *         occurred
     */
    public String digest(Collection<File> files) {
        try {
            MessageDigest digest = MessageDigest.getInstance("SHA-1");
            for (File file : files) {
                try (InputStream is = Files.newInputStream(file.toPath(), StandardOpenOption.READ)) {
                    byte[] buffer = new byte[8192];
                    int read = 0;
                    while ((read = is.read(buffer)) > 0) {
                        digest.update(buffer, 0, read);
                    }
                } catch (IOException ioex) {
                    Logger.getLogger(ResultAwareCommandPipeline.class.getName()).log(Level.SEVERE, null, ioex);
                    return null;
                }
            }
            byte[] sha1 = digest.digest();
            BigInteger bigInt = new BigInteger(1, sha1);
            return StringUtils.leftPad(bigInt.toString(16), 40, "0");
        } catch (NoSuchAlgorithmException ex) {
            Logger.getLogger(ResultAwareCommandPipeline.class.getName()).log(Level.SEVERE, null, ex);
        }
        return null;
    }

    /**
     * Used to check, whether any parameters have changed compared to the last
     * invocation.
     *
     * @param cmd the command to calculate a hash code for, based on reflection
     * @return the hash code as string
     */
    protected String getParameterHash(IFragmentCommand cmd) {
        Collection<String> fieldNames = AnnotationInspector.getRequiredConfigFieldNames(cmd.getClass());
        PublicMemberGetters<IFragmentCommand> pmg = new PublicMemberGetters<>(cmd);
        HashCodeBuilder hcb = new HashCodeBuilder();
        for (String fieldName : fieldNames) {
            Method m = pmg.getMethodForFieldName(fieldName);
            if (m != null) {
                try {
                    Object o = m.invoke(cmd);
                    hcb.append(o);
                    log.debug("Accessing field {} of {}. Current hash={}",
                            new Object[] { fieldName, cmd.getClass().getName(), hcb.toHashCode() });
                } catch (IllegalAccessException | IllegalArgumentException | InvocationTargetException ex) {
                    Logger.getLogger(ResultAwareCommandPipeline.class.getName()).log(Level.SEVERE, null, ex);
                }
            }
        }
        return hcb.toHashCode() + "";
    }

    /**
     *
     * @param workflow
     * @param cmd
     */
    @Override
    protected void runFragmentCommand(IWorkflow workflow, IFragmentCommand cmd) {
        try {
            beforeCommand(cmd);
            TupleND<IFileFragment> results;
            if (!isUpToDate(getTmp(), cmd)) {
                if (getWorkflow().getOutputDirectory(cmd).exists()
                        && getWorkflow().getOutputDirectory(cmd).listFiles().length > 0) {
                    log.info("Deleting invalid results for {} below {}", cmd,
                            getWorkflow().getOutputDirectory(cmd));
                    try {
                        FileUtils.deleteDirectory(getWorkflow().getOutputDirectory(cmd));
                    } catch (IOException ex) {
                        log.warn("Caught IO Exception while trying to delete workflow output directory at "
                                + getWorkflow().getOutputDirectory());
                        throw new RuntimeException(ex);
                    }
                }
                long start = System.nanoTime();
                results = cmd.apply(getTmp());
                storeCommandRuntime(start, System.nanoTime(), cmd, getWorkflow());
            } else {
                log.info("Skipping, everything up to date!");
                File outputDir = getWorkflow().getOutputDirectory(cmd);
                Collection<File> inputFiles = FileUtils.listFiles(outputDir,
                        new String[] { "cdf", "CDF", "nc", "NC" }, false);
                TupleND<IFileFragment> inputFragments = new TupleND<>();
                for (File f : inputFiles) {
                    inputFragments.add(getWorkflow().getFactory().getFileFragmentFactory().create(f));
                }
                log.debug("Setting file fragments {} as next input!", inputFragments);
                results = inputFragments;
            }
            updateHashes(getTmp(), cmd);
            setTmp(results);
            log.debug("Hashes: {}", ConfigurationUtils.toString(getHashes()));
        } finally {
            afterCommand(cmd);
        }
    }

    @Override
    public void before() {
        log.info("Looking for results from previous workflow invocation...");
        if (getWorkflow().getWorkflowXmlFile().isFile()) {
            log.info("Found previous workflow.xml file!");
            if (isInputUpToDate(getInput(), getWorkflow())) {
                log.info("Input data has not changed, restoring workflow!");
                getWorkflow().load(getWorkflow().getWorkflowXmlFile());
            } else {
                try {
                    log.info("Input data has changed, deleting previous workflow output!");
                    FileUtils.deleteDirectory(getWorkflow().getOutputDirectory());
                    getWorkflow().clearResults();
                } catch (IOException ex) {
                    throw new RuntimeException(
                            "Deletion of directory " + getWorkflow().getOutputDirectory() + " failed!", ex);
                }
                getWorkflow().getOutputDirectory().mkdirs();
            }
        } else {
            log.info("Did not find results from a previous workflow instance!");
        }
        //update input file hashes
        updateWorkflowInputHashes(getInput(), getWorkflow());
        if (getExecutionServer() == null && !getWorkflow().isExecuteLocal()) {
            log.info("Launching execution infrastructure!");
            setExecutionServer(ComputeServerFactory.getComputeServer());
            File computeHostJarLocation = new File(System.getProperty("maltcms.home"), "maltcms.jar");
            if (!computeHostJarLocation.exists() || !computeHostJarLocation.isFile()) {
                throw new ExitVmException("Could not locate maltcms.jar in " + System.getProperty("maltcms.home"));
            }
            final PropertiesConfiguration cfg = new PropertiesConfiguration();
            //set execution type
            cfg.setProperty(ConfigurationKeys.KEY_EXECUTION_MODE, ExecutionType.DRMAA);
            //set location of compute host jar
            cfg.setProperty(ConfigurationKeys.KEY_PATH_TO_COMPUTEHOST_JAR, computeHostJarLocation);
            //exit to console when master server shuts down
            cfg.setProperty(ConfigurationKeys.KEY_MASTER_SERVER_EXIT_ON_SHUTDOWN, true);
            //limit the number of used compute hosts
            cfg.setProperty(ConfigurationKeys.KEY_MAX_NUMBER_OF_CHOSTS,
                    getWorkflow().getConfiguration().getInt("maltcms.pipelinethreads", 1));
            //native specs for the drmaa api
            cfg.setProperty(ConfigurationKeys.KEY_NATIVE_SPEC,
                    getWorkflow().getConfiguration().getString("mpaxs.nativeSpec", ""));
            getExecutionServer().startMasterServer(cfg);
        }
    }

}