org.commonwl.view.researchobject.ROBundleService.java Source code

Java tutorial

Introduction

Here is the source code for org.commonwl.view.researchobject.ROBundleService.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.commonwl.view.researchobject;

import static org.apache.commons.io.FileUtils.readFileToString;

import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.io.FileUtils;
import org.apache.commons.io.FilenameUtils;
import org.apache.jena.query.QuerySolution;
import org.apache.jena.query.ResultSet;
import org.apache.taverna.robundle.Bundle;
import org.apache.taverna.robundle.Bundles;
import org.apache.taverna.robundle.manifest.Agent;
import org.apache.taverna.robundle.manifest.Manifest;
import org.apache.taverna.robundle.manifest.PathAnnotation;
import org.apache.taverna.robundle.manifest.PathMetadata;
import org.apache.taverna.robundle.manifest.Proxy;
import org.commonwl.view.WebConfig.Format;
import org.commonwl.view.cwl.CWLTool;
import org.commonwl.view.cwl.CWLValidationException;
import org.commonwl.view.cwl.RDFService;
import org.commonwl.view.git.GitDetails;
import org.commonwl.view.git.GitSemaphore;
import org.commonwl.view.git.GitService;
import org.commonwl.view.graphviz.GraphVizService;
import org.commonwl.view.workflow.Workflow;
import org.eclipse.jgit.api.Git;
import org.eclipse.jgit.api.errors.GitAPIException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;

/**
 * Service handling Research Object Bundles
 */
@Service
public class ROBundleService {

    private final Logger logger = LoggerFactory.getLogger(this.getClass());

    // Services
    private GraphVizService graphVizService;
    private GitService gitService;
    private RDFService rdfService;
    private CWLTool cwlTool;
    private GitSemaphore gitSemaphore;

    // Configuration variables
    private Agent appAgent;
    private int singleFileSizeLimit;
    private Path bundleStorage;

    // Pattern for extracting version from a cwl file
    private final String CWL_VERSION_REGEX = "cwlVersion:\\s*\"?(?:cwl:)?([^\\s\"]+)\"?";
    private final Pattern cwlVersionPattern = Pattern.compile(CWL_VERSION_REGEX);

    /**
     * Creates an instance of this service which handles Research Object Bundles
     * @param bundleStorage The configured storage location for bundles
     * @param appName The name of the application from properties, for attribution
     * @param appURL The URL of the application from properties, for attribution
     * @param singleFileSizeLimit The file size limit for each file in the RO bundle
     * @throws URISyntaxException Error in creating URI for appURL
     */
    @Autowired
    public ROBundleService(@Value("${bundleStorage}") Path bundleStorage,
            @Value("${applicationName}") String appName, @Value("${applicationURL}") String appURL,
            @Value("${singleFileSizeLimit}") int singleFileSizeLimit, GraphVizService graphVizService,
            GitService gitService, RDFService rdfService, GitSemaphore gitSemaphore, CWLTool cwlTool)
            throws URISyntaxException {
        this.bundleStorage = bundleStorage;
        this.appAgent = new Agent(appName);
        appAgent.setUri(new URI(appURL));
        this.singleFileSizeLimit = singleFileSizeLimit;
        this.graphVizService = graphVizService;
        this.gitService = gitService;
        this.rdfService = rdfService;
        this.gitSemaphore = gitSemaphore;
        this.cwlTool = cwlTool;
    }

    /**
     * Creates a new research object bundle for a workflow from a Git repository
     * @param workflow The workflow to create the research object for
     * @return The constructed bundle
     */
    public Bundle createBundle(Workflow workflow, GitDetails gitInfo) throws IOException {

        // Create a new RO bundle
        Bundle bundle = Bundles.createBundle();
        Manifest manifest = bundle.getManifest();

        // Simplified attribution for RO bundle
        try {
            manifest.setId(new URI(workflow.getPermalink()));

            // Tool attribution in createdBy
            manifest.setCreatedBy(appAgent);

            // Retrieval Info
            // TODO: Make this importedBy/On/From
            manifest.setRetrievedBy(appAgent);
            manifest.setRetrievedOn(manifest.getCreatedOn());
            manifest.setRetrievedFrom(new URI(workflow.getPermalink(Format.ro)));

            // Make a directory in the RO bundle to store the files
            Path bundleRoot = bundle.getRoot();
            Path bundlePath = bundleRoot.resolve("workflow");
            Files.createDirectory(bundlePath);

            // Add the files from the repo to this workflow
            Set<HashableAgent> authors = new HashSet<>();

            boolean safeToAccess = gitSemaphore.acquire(gitInfo.getRepoUrl());
            try {
                Git gitRepo = gitService.getRepository(workflow.getRetrievedFrom(), safeToAccess);
                Path relativePath = Paths.get(FilenameUtils.getPath(gitInfo.getPath()));
                Path gitPath = gitRepo.getRepository().getWorkTree().toPath().resolve(relativePath);
                addFilesToBundle(gitInfo, bundle, bundlePath, gitRepo, gitPath, authors, workflow);
            } finally {
                gitSemaphore.release(gitInfo.getRepoUrl());
            }

            // Add combined authors
            manifest.setAuthoredBy(new ArrayList<>(authors));

            // Add visualisation images
            File png = graphVizService.getGraph(workflow.getID() + ".png", workflow.getVisualisationDot(), "png");
            Files.copy(png.toPath(), bundleRoot.resolve("visualisation.png"));
            PathMetadata pngAggr = bundle.getManifest().getAggregation(bundleRoot.resolve("visualisation.png"));
            pngAggr.setRetrievedFrom(new URI(workflow.getPermalink(Format.png)));

            File svg = graphVizService.getGraph(workflow.getID() + ".svg", workflow.getVisualisationDot(), "svg");
            Files.copy(svg.toPath(), bundleRoot.resolve("visualisation.svg"));
            PathMetadata svgAggr = bundle.getManifest().getAggregation(bundleRoot.resolve("visualisation.svg"));
            svgAggr.setRetrievedFrom(new URI(workflow.getPermalink(Format.svg)));

            // Add annotation files
            GitDetails wfDetails = workflow.getRetrievedFrom();

            // Get URL to run cwltool
            String rawUrl = wfDetails.getRawUrl();
            String packedWorkflowID = wfDetails.getPackedId();
            if (packedWorkflowID != null) {
                if (packedWorkflowID.charAt(0) != '#') {
                    rawUrl += "#";
                }
                rawUrl += packedWorkflowID;
            }

            // Run cwltool for annotations
            List<PathAnnotation> manifestAnnotations = new ArrayList<>();
            try {
                addAggregation(bundle, manifestAnnotations, "merged.cwl", cwlTool.getPackedVersion(rawUrl));
            } catch (CWLValidationException ex) {
                logger.error("Could not pack workflow when creating Research Object", ex.getMessage());
            }
            String rdfUrl = workflow.getIdentifier();
            if (rdfService.graphExists(rdfUrl)) {
                addAggregation(bundle, manifestAnnotations, "workflow.ttl",
                        new String(rdfService.getModel(rdfUrl, "TURTLE")));
            }
            bundle.getManifest().setAnnotations(manifestAnnotations);

            // Git2prov history
            List<Path> history = new ArrayList<>();
            // FIXME: Below is a a hack to pretend the URI is a Path
            String git2prov = "http://git2prov.org/git2prov?giturl=" + gitInfo.getRepoUrl()
                    + "&serialization=PROV-JSON";
            Path git2ProvPath = bundle.getRoot().relativize(bundle.getRoot().resolve(git2prov));
            history.add(git2ProvPath);
            bundle.getManifest().setHistory(history);

        } catch (URISyntaxException ex) {
            logger.error("Error creating URI for RO Bundle", ex);
        } catch (GitAPIException ex) {
            logger.error("Error getting repository to create RO Bundle", ex);
        }

        // Return the completed bundle
        return bundle;

    }

    /**
     * Add files to this bundle from a list of repository contents
     * @param gitDetails The Git information for the repository
     * @param bundle The RO bundle to add files/directories to
     * @param bundlePath The current path within the RO bundle
     * @param gitRepo The Git repository
     * @param repoPath The current path within the Git repository
     * @param authors The combined set of authors for al the files
     */
    private void addFilesToBundle(GitDetails gitDetails, Bundle bundle, Path bundlePath, Git gitRepo, Path repoPath,
            Set<HashableAgent> authors, Workflow workflow) throws IOException {
        File[] files = repoPath.toFile().listFiles();
        for (File file : files) {
            if (!file.getName().equals(".git")) {
                if (file.isDirectory()) {

                    // Create a new folder in the RO for this directory
                    Path newBundlePath = bundlePath.resolve(file.getName());
                    Files.createDirectory(newBundlePath);

                    // Create git details object for subfolder
                    GitDetails subfolderGitDetails = new GitDetails(gitDetails.getRepoUrl(), gitDetails.getBranch(),
                            Paths.get(gitDetails.getPath()).resolve(file.getName()).toString());

                    // Add all files in the subdirectory to this new folder
                    addFilesToBundle(subfolderGitDetails, bundle, newBundlePath, gitRepo,
                            repoPath.resolve(file.getName()), authors, workflow);

                } else {
                    try {
                        // Where to store the new file
                        Path bundleFilePath = bundlePath.resolve(file.getName());
                        Path gitFolder = Paths.get(gitDetails.getPath());
                        String relativePath = gitFolder.resolve(file.getName()).toString();
                        Path gitPath = bundlePath.getRoot().resolve(relativePath); // would start with /

                        // Get direct URL permalink
                        URI rawURI = new URI("https://w3id.org/cwl/view/git/" + workflow.getLastCommit() + gitPath
                                + "?format=raw");

                        // Variable to store file contents and aggregation
                        String fileContent = null;
                        PathMetadata aggregation;

                        // Download or externally link if oversized
                        if (file.length() <= singleFileSizeLimit) {
                            // Save file to research object bundle
                            fileContent = readFileToString(file);
                            Bundles.setStringValue(bundleFilePath, fileContent);

                            // Set retrieved information for this file in the manifest
                            aggregation = bundle.getManifest().getAggregation(bundleFilePath);
                            aggregation.setRetrievedFrom(rawURI);
                            aggregation.setRetrievedBy(appAgent);
                            aggregation.setRetrievedOn(aggregation.getCreatedOn());
                        } else {
                            logger.info("File " + file.getName() + " is too large to download - "
                                    + FileUtils.byteCountToDisplaySize(file.length()) + "/"
                                    + FileUtils.byteCountToDisplaySize(singleFileSizeLimit)
                                    + ", linking externally to RO bundle");

                            // Set information for this file in the manifest
                            aggregation = bundle.getManifest().getAggregation(rawURI);
                            Proxy bundledAs = new Proxy();
                            bundledAs.setURI();
                            bundledAs.setFolder(repoPath);
                            aggregation.setBundledAs(bundledAs);
                        }

                        // Special handling for cwl files
                        boolean cwl = FilenameUtils.getExtension(file.getName()).equals("cwl");
                        if (cwl) {
                            // Correct mime type (no official standard for yaml)
                            aggregation.setMediatype("text/x-yaml");

                            // Add conformsTo for version extracted from regex
                            if (fileContent != null) {
                                Matcher m = cwlVersionPattern.matcher(fileContent);
                                if (m.find()) {
                                    aggregation.setConformsTo(new URI("https://w3id.org/cwl/" + m.group(1)));
                                }
                            }
                        }

                        try {
                            // Add authors from git commits to the file
                            Set<HashableAgent> fileAuthors = gitService.getAuthors(gitRepo, gitPath.toString());

                            if (cwl) {
                                // Attempt to get authors from cwl description - takes priority
                                ResultSet descAuthors = rdfService.getAuthors(
                                        bundlePath.resolve(file.getName()).toString().substring(10),
                                        workflow.getIdentifier());
                                if (descAuthors.hasNext()) {
                                    QuerySolution authorSoln = descAuthors.nextSolution();
                                    HashableAgent newAuthor = new HashableAgent();
                                    if (authorSoln.contains("name")) {
                                        newAuthor.setName(authorSoln.get("name").toString());
                                    }
                                    if (authorSoln.contains("email")) {
                                        newAuthor.setUri(new URI(authorSoln.get("email").toString()));
                                    }
                                    if (authorSoln.contains("orcid")) {
                                        newAuthor.setOrcid(new URI(authorSoln.get("orcid").toString()));
                                    }
                                    fileAuthors.remove(newAuthor);
                                    fileAuthors.add(newAuthor);
                                }
                            }

                            authors.addAll(fileAuthors);
                            aggregation.setAuthoredBy(new ArrayList<>(fileAuthors));
                        } catch (GitAPIException ex) {
                            logger.error("Could not get commits for file " + repoPath, ex);
                        }

                        // Set retrieved information for this file in the manifest
                        aggregation.setRetrievedFrom(rawURI);
                        aggregation.setRetrievedBy(appAgent);
                        aggregation.setRetrievedOn(aggregation.getCreatedOn());

                    } catch (URISyntaxException ex) {
                        logger.error("Error creating URI for RO Bundle", ex);
                    }
                }
            }
        }
    }

    /**
     * Save the Research Object Bundle to disk
     * @param roBundle The bundle to be saved
     * @return The path to the research object
     * @throws IOException Any errors in saving
     */
    public Path saveToFile(Bundle roBundle) throws IOException {
        String fileName = "bundle-" + java.util.UUID.randomUUID() + ".zip";
        Path bundleLocation = Files.createFile(bundleStorage.resolve(fileName));
        Bundles.closeAndSaveBundle(roBundle, bundleLocation);
        return bundleLocation;
    }

    /**
     * Add an aggregation to the Research Object Bundle
     * @param roBundle The bundle to add to
     * @param fileName The file name of the aggregation
     * @param manifestAnnotations The list of manifest aggregations
     * @param content The identifier for the resource containing the
     *                body of the annotation
     * @throws IOException Errors accessing the bundle
     */
    private void addAggregation(Bundle roBundle, List<PathAnnotation> manifestAnnotations, String fileName,
            String content) throws IOException {
        Path annotations = Bundles.getAnnotations(roBundle);
        Path packedPath = annotations.resolve(fileName);
        Bundles.setStringValue(packedPath, content);
        PathAnnotation packedFile = new PathAnnotation();
        packedFile.setContent(packedPath);
        packedFile.setAbout(roBundle.getManifest().getId());
        packedFile.generateAnnotationId();
        manifestAnnotations.add(packedFile);
    }

}