org.dataconservancy.packaging.tool.impl.generator.DomainObjectResourceBuilder.java Source code

Java tutorial

Introduction

Here is the source code for org.dataconservancy.packaging.tool.impl.generator.DomainObjectResourceBuilder.java

Source

/*
 * Copyright 2015 Johns Hopkins University
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.dataconservancy.packaging.tool.impl.generator;

import java.io.IOException;
import java.io.InputStream;

import java.net.URI;

import java.net.URISyntaxException;
import java.nio.file.Paths;

import java.util.HashMap;
import java.util.Map;
import java.util.TreeMap;
import java.util.regex.Matcher;

import org.apache.commons.collections.MapUtils;

import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.RDFNode;
import org.apache.jena.rdf.model.Resource;
import org.apache.jena.riot.RDFFormat;
import org.apache.jena.util.ResourceUtils;

import org.dataconservancy.packaging.tool.api.generator.PackageResourceType;
import org.dataconservancy.packaging.tool.model.PackageToolException;
import org.dataconservancy.packaging.tool.model.ipm.Node;
import org.dataconservancy.packaging.tool.ontologies.Ontologies;
import org.dataconservancy.packaging.tool.ser.PackageStateSerializer;

import static org.dataconservancy.packaging.tool.impl.generator.IPMUtil.path;
import static org.dataconservancy.packaging.tool.impl.generator.RdfUtil.bare;
import static org.dataconservancy.packaging.tool.impl.generator.RdfUtil.cut;
import static org.dataconservancy.packaging.tool.impl.generator.RdfUtil.determineSerialization;
import static org.dataconservancy.packaging.tool.impl.generator.RdfUtil.selectLocal;
import static org.dataconservancy.packaging.tool.impl.generator.RdfUtil.toInputStream;
import static org.dataconservancy.packaging.tool.impl.generator.RemediationUtil.remediatePath;
import static org.dataconservancy.packaging.tool.impl.generator.RemediationUtil.unique;
import static org.dataconservancy.packaging.tool.model.BagItParameterNames.BAGIT_PROFILE_ID;

/**
 * Serializes domain object graphs into individual resources in a bag.
 * <p>
 * A "domain object graph" is a graph that contains all triples with the a
 * domain object as a URI, all triples that are hash fragments of the domain
 * object URI, and any triples with blank node subjects that are traversible
 * from either.
 * </p>
 * 
 * @author apb
 * @version $Id$
 */
class DomainObjectResourceBuilder implements NodeVisitor {

    public String OBJECT_PATH = "obj/";

    public String BINARY_PATH = "bin/";

    PackageStateSerializer serializer;

    @SuppressWarnings("unchecked")
    Map<String, String> PREFIX_MAP = MapUtils.invertMap(Ontologies.PREFIX_MAP);

    public void setPackageStateSerializer(PackageStateSerializer ser) {
        this.serializer = ser;
    }

    /* Reserve and translate URIs from opaque to resolvable via the Assembler. */
    @Override
    public void init(PackageModelBuilderState state) {

        /*
         * First, build a sorted map of all resources in the model. We'll be
         * doing URI swapping/remapping to be consistent with resources and
         * linking in the bag.
         */
        TreeMap<String, Resource> originalResources = new TreeMap<>();
        state.domainObjects.listSubjects().forEachRemaining(s -> originalResources.put(s.toString(), s));
        state.domainObjects.listObjects().filterKeep(o -> o.isResource() && !o.isAnon())
                .forEachRemaining(o -> originalResources.put(o.toString(), o.asResource()));

        state.tree.walk(node -> {

            /* Skip over removed nodes */
            if (node.isIgnored() || node.getDomainObject() == null) {
                if (node.getDomainObject() != null) {
                    /* Remove the domain object graph */
                    Model ignored = cut(state.domainObjects,
                            selectLocal(state.domainObjects.getResource(node.getDomainObject().toString())));

                    /*
                     * Remove triples that involve a subject defined in
                     * it
                     */
                    ignored.listSubjects().filterKeep(RDFNode::isURIResource)
                            .forEachRemaining(r -> state.domainObjects.removeAll(null, null, r));
                }
                return;
            }

            /* Sanity check */
            if (node.getFileInfo() != null && !Paths.get(node.getFileInfo().getLocation()).toFile().exists()) {
                throw new RuntimeException(
                        "IPM node points to file location that doesn't exist: " + node.getFileInfo().getLocation());
            }

            /* Get the former domain object URI */
            URI originalDomainObjectURI = node.getDomainObject();

            /* This is where the domain object will be serialized */
            URI newDomainObjectURI = null;
            try {
                newDomainObjectURI = reserveObjectResource(node, state);
            } catch (URISyntaxException e) {
                throw new RuntimeException(e.getMessage(), e);
            }

            state.domainObjectSerializationLocations.put(node.getIdentifier(), newDomainObjectURI);

            if (node.getFileInfo() != null && node.getFileInfo().isFile()) {
                try {
                    URI newFileLocation = createBinaryResource(node, state);

                    URI originalFileLocation = node.getFileInfo().getLocation();

                    state.renamedContentLocations.put(originalFileLocation, newFileLocation);

                    if (!state.domainObjects
                            .containsResource(state.domainObjects.getResource(originalFileLocation.toString()))) {

                        /*
                         * If the file content location is not linked
                         * to, then the domain object URI *is* the
                         * binary URI
                         */
                        newDomainObjectURI = newFileLocation;
                    } else {
                        /*
                         * We replace references to file location with
                         * the binary URI
                         */
                        remap(bare(node.getFileInfo().getLocation().toString()), newFileLocation.toString(),
                                originalResources, state.renamedResources);

                    }

                    node.getFileInfo().setLocation(newFileLocation);
                } catch (Exception e) {
                    throw new RuntimeException(e);
                }
            } else if (node.getFileInfo() != null && node.getFileInfo().isDirectory()) {
                /* It's a directory, so map it to a directory bag URI */
                URI newLocation = state.assembler.reserveDirectory(BINARY_PATH + path(node, ""),
                        PackageResourceType.DATA);

                state.renamedContentLocations.put(node.getFileInfo().getLocation(), newLocation);
                node.getFileInfo().setLocation(newLocation);
            }

            node.setDomainObject(newDomainObjectURI);

            /*
             * Rebase all URIs and hash URIs to the assembler-provided
             * URI
             */
            if (originalDomainObjectURI != null && node.getDomainObject() != null) {
                remap(bare(originalDomainObjectURI.toString()), node.getDomainObject().toString(),
                        originalResources, state.renamedResources);
            }
        });
    }

    private URI createBinaryResource(Node node, PackageModelBuilderState state)
            throws IOException, URISyntaxException {
        URI resource;
        try {
            resource = state.assembler.createResource(BINARY_PATH + remediatePath(path(node, ""), profileId(state)),
                    PackageResourceType.DATA, node.getFileInfo().getLocation().toURL().openStream());
        } catch (PackageToolException e) {
            if (e.getCode() == 409) {
                // handle duplicate reservation
                String uniquePath = unique(node, remediatePath(path(node, ""), profileId(state)));
                resource = state.assembler.createResource(BINARY_PATH + uniquePath, PackageResourceType.DATA,
                        node.getFileInfo().getLocation().toURL().openStream());
            } else {
                throw e;
            }

        }

        return resource;
    }

    private URI reserveObjectResource(Node node, PackageModelBuilderState state) throws URISyntaxException {
        URI resource;
        try {
            resource = state.assembler
                    .reserveResource(
                            OBJECT_PATH + remediatePath(path(node,
                                    "." + determineSerialization(state.params, RDFFormat.TURTLE_PRETTY).getLang()
                                            .getFileExtensions().get(0)),
                                    profileId(state)),
                            PackageResourceType.DATA);
        } catch (PackageToolException e) {
            if (e.getCode() == 409) {
                // handle duplicate reservation
                String uniquePath = unique(node,
                        remediatePath(path(node, "." + determineSerialization(state.params, RDFFormat.TURTLE_PRETTY)
                                .getLang().getFileExtensions().get(0)), profileId(state)));
                resource = state.assembler.reserveResource(OBJECT_PATH + uniquePath, PackageResourceType.DATA);
            } else {
                throw e;
            }
        }

        return resource;
    }

    private static String profileId(PackageModelBuilderState state) {
        return state.params.getParam(BAGIT_PROFILE_ID, 0);
    }

    /* Serialize the domain object, and save the binary content */
    @Override
    public void visitNode(Node node, PackageModelBuilderState state) {

        if (node.isIgnored()) {
            return;
        }

        Resource primaryDomainObject = state.domainObjects.getResource(node.getDomainObject().toString());

        /* Cut the domain object graph out of the graph of domain objects */
        Model domainObjectGraph = cut(state.domainObjects, selectLocal(primaryDomainObject));

        /*
         * If the domain object is serialized at a location that is identical to
         * its URI, then use the null relative URI in its representation.
         */
        if (node.getDomainObject().equals(state.domainObjectSerializationLocations.get(node.getIdentifier()))) {
            String baseURI = bare(primaryDomainObject.getURI());
            domainObjectGraph.listSubjects().toSet().stream()
                    .filter(subject -> bare(subject.toString()).equals(baseURI)).forEach(subject -> ResourceUtils
                            .renameResource(subject, subject.toString().replaceFirst(baseURI, "")));
        }

        try (InputStream stream = toInputStream(domainObjectGraph,
                determineSerialization(state.params, RDFFormat.TURTLE))) {
            state.assembler.putResource(state.domainObjectSerializationLocations.get(node.getIdentifier()), stream);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }

    }

    /*
     * Verify that we have exhausted all domain object triples. If not,
     * something is amiss!
     */
    @Override
    public void finish(PackageModelBuilderState state) {
        if (state.domainObjects.listStatements().hasNext()) {
            throw new RuntimeException("Did not serialize all triples! " + state.domainObjects);
        }
    }

    private static void remap(String oldBaseURI, String newBaseURI, TreeMap<String, Resource> resources,
            Map<String, String> renameMap) {

        Map<String, Resource> toReplace = new HashMap<>();

        /* Consider the the URI plus any hash fragments */
        toReplace.putAll(resources.subMap(oldBaseURI, true, oldBaseURI, true));
        toReplace.putAll(resources.subMap(oldBaseURI + "#", oldBaseURI + "#" + Character.MAX_VALUE));

        /* Swap out the base URI for each matching resource */
        toReplace.entrySet().forEach(res -> {
            String newURI = res.getKey().replaceFirst(oldBaseURI, Matcher.quoteReplacement(newBaseURI));
            renameMap.put(res.getValue().toString(), newURI);
            ResourceUtils.renameResource(res.getValue(), newURI);
        });
    }
}