Java tutorial
/* * Copyright 2017 Johns Hopkins University * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.dataconservancy.packaging.impl.deposit; import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.http.entity.ContentType.parse; import static org.apache.jena.riot.RDFLanguages.contentTypeToLang; import static org.dataconservancy.packaging.ingest.PackagedResource.Type.NONRDFSOURCE; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.net.URI; import java.net.URLEncoder; import java.util.Arrays; import java.util.Collections; import java.util.Map; import java.util.Set; import org.fcrepo.client.FcrepoClient; import org.fcrepo.client.FcrepoOperationFailedException; import org.fcrepo.client.FcrepoResponse; import org.dataconservancy.packaging.ingest.DepositFactory; import org.dataconservancy.packaging.ingest.Depositor; import org.dataconservancy.packaging.ingest.PackagedResource; import org.apache.commons.io.FilenameUtils; import org.apache.commons.io.IOUtils; import org.apache.jena.rdf.model.Model; import org.apache.jena.rdf.model.ModelFactory; import org.apache.jena.rdf.model.RDFNode; import org.apache.jena.rdf.model.Resource; import org.apache.jena.rdf.model.Statement; import org.apache.jena.riot.RDFDataMgr; import org.apache.jena.riot.system.StreamRDFLib; import org.apache.jena.riot.writer.NTriplesWriter; import org.apache.jena.util.ResourceUtils; import org.osgi.service.component.annotations.Activate; import org.osgi.service.component.annotations.Component; import org.osgi.service.component.annotations.ConfigurationPolicy; import org.osgi.service.metatype.annotations.AttributeDefinition; import org.osgi.service.metatype.annotations.Designate; import org.osgi.service.metatype.annotations.ObjectClassDefinition; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @ObjectClassDefinition(name = "Something") @interface Config { @AttributeDefinition(name = "Fedora Base URI", description = "Fedora repository root") String baseUri() default "http//localhost:8080/fcrepo/rest"; @AttributeDefinition(name = "Use Transactions", description = "Perform deposits in a transactioj") boolean useTransactions() default true; @AttributeDefinition(name = "Use sparql patch", description = "Use sparql patch instead of PUT for modifying resources") boolean useSparqlPatch() default true; } /** * Deposit factory into Fedora implementations. * * @author apb@jhu.edu */ @Designate(ocd = Config.class) @Component(configurationPolicy = ConfigurationPolicy.OPTIONAL) public class FedoraDepositFactory implements DepositFactory { private String baseUri = "http://localhost:8080/fcrepo/rest"; private static final Logger LOG = LoggerFactory.getLogger(FedoraDepositFactory.class); /** * Set the Fedora baseURI. * * @param base the base URI. */ public void setBaseUri(final String base) { this.baseUri = base; } /** * Whether to use Fedora transaction/batch operations. * * @param useTransactions true to use transactions */ public void setUseTransactions(final boolean useTransactions) { this.doTx = useTransactions; } /** * Use SPARQL/Update PATCH requests. * * @param useSparql if True, updates to objects will use SPARQL/Update PATCH. */ public void setUseSparql(final boolean useSparql) { this.setUseTransactions(true); } public boolean doTx = true; public boolean useSparql = true; /** * Configure via OSGi. * * @param config configuration params. */ @Activate public void configure(final Config config) { setBaseUri(config.baseUri()); setUseTransactions(this.doTx = config.useTransactions()); setUseSparql(this.useSparql = config.useSparqlPatch()); } @Override public Depositor newDepositor(final URI depositInto, final Map<String, Object> context) { final FcrepoClient client = FcrepoClient.client().build(); final String txStart = baseUri + "/fcr:tx"; URI txBase = URI.create(baseUri); if (doTx) { try (FcrepoResponse response = client.post(URI.create(txStart)).perform()) { txBase = response.getLocation(); } catch (final Exception e) { throw new RuntimeException(e); } } return new TxDepositor(txBase, depositInto, client); } private class TxDepositor implements Depositor { private final URI txBase; private final URI txDepositInto; private final FcrepoClient client; private TxDepositor(final URI txBase, final URI canonicalDepositInto, final FcrepoClient client) { this.txBase = txBase; this.client = client; // This is a little hairy, but there's no real alternative! txDepositInto = URI.create(canonicalDepositInto.toString().replace(baseUri, txBase.toString())); } @Override public DepositedResource deposit(final PackagedResource resource, final URI parent) { final URI depositInto = parent == null ? txDepositInto : parent; final DepositedResource deposited = new DepositedResource(); deposited.uri = doDeposit(resource, depositInto); if (resource.getDescription() != null) { LOG.debug("Deposited binary {} has description, finding description resource in repository", deposited.uri, deposited.describedBy); try (FcrepoResponse response = client.head(deposited.uri).perform()) { deposited.describedBy = response.getLinkHeaders("describedby").get(0); LOG.debug("Deposited binary has description {}", deposited.describedBy); doReplace(resource.getDescription(), deposited.uri, deposited.describedBy); } catch (final Exception e) { throw new RuntimeException("Replace failed", e); } } return deposited; } private URI doDeposit(final PackagedResource resource, final URI parent) { LOG.debug("Depositing into {} into {}", resource.getURI(), parent); try (InputStream content = resource.getBody(); FcrepoResponse r = client.post(parent).slug(fileName(resource)) .filename(fileNameIfBinary(resource)).body(content, resource.getMediaType()) .perform()) { checkError(r); LOG.debug("Successfully deposited {} into {} as {}", resource.getURI(), parent, r.getLocation()); return r.getLocation(); } catch (final Exception e) { throw new RuntimeException(e); } } private String fileNameIfBinary(final PackagedResource resource) { if (NONRDFSOURCE.equals(resource.getType())) { return fileName(resource); } return null; } private void doReplace(final PackagedResource resource, final URI nonrdfsource, final URI target) { LOG.debug("Updating contents of {} with {}", target, resource.getURI()); try (FcrepoResponse r = client.patch(target).body(sparqlAdd(resource, nonrdfsource)).perform()) { checkError(r); LOG.debug("Done updating contents of {} with {}", target, resource.getURI()); } catch (final Exception e) { throw new RuntimeException( "Could not update contents of resource " + target + ": " + e.getMessage(), e); } } private InputStream sparqlAdd(final PackagedResource resource, final URI into) throws IOException { final ByteArrayOutputStream out = new ByteArrayOutputStream(); out.write("INSERT DATA {\n".getBytes(UTF_8)); try (InputStream content = resource.getBody()) { RDFDataMgr.parse(StreamRDFLib.writer(out), content, into.toString(), contentTypeToLang(parse(resource.getMediaType()).getMimeType())); } catch (final IOException e) { throw new RuntimeException("Could not read package contents of " + resource.getURI(), e); } out.write("}".getBytes(UTF_8)); return new ByteArrayInputStream(out.toByteArray()); } @Override public void commit() { if (doTx) { final URI commitUri = URI.create(txBase.toString() + "/fcr:tx/fcr:commit"); try (FcrepoResponse r = client.post(commitUri).perform()) { checkError(r); } catch (final Exception e) { throw new RuntimeException(e); } } } @Override public void rollback() { if (doTx) { final URI rollbackUri = URI.create(txBase.toString() + "/fcr:tx/fcr:rollback"); try (FcrepoResponse r = client.post(rollbackUri).perform()) { checkError(r); } catch (final Exception e) { throw new RuntimeException(e); } } } @Override public void remap(final URI toRemap, final Map<URI, URI> localToRepository) { final Model updatedModel = ModelFactory.createDefaultModel(); final Model originalModel = ModelFactory.createDefaultModel(); // GET an instance of the object without server-managed or ldp containment triples. // It's darn near impossible to do a PUT that contains *any* of these with Fedora, so // use a minimal approach. try (FcrepoResponse response = client.get(toRemap).preferMinimal() .preferRepresentation(Collections.emptyList(), Arrays.asList(URI.create("http://fedora.info/definitions/v4/repository#ServerManaged"), URI.create("http://www.w3.org/ns/ldp#PreferContainment"))) .accept("text/turtle").perform()) { RDFDataMgr.parse(StreamRDFLib.graph(updatedModel.getGraph()), response.getBody(), contentTypeToLang(parse(response.getContentType()).getMimeType())); } catch (final Exception e) { throw new RuntimeException("Could not read object to remap: " + toRemap, e); } // If we're using sparql update, keep a copy of the original model around to take a diff. if (useSparql) { originalModel.add(updatedModel); } // Find all the package-local URIs to remap final Set<String> localUris = updatedModel.listObjects().filterKeep(RDFNode::isURIResource) .mapWith(RDFNode::asResource).mapWith(Resource::getURI) .filterKeep(uri -> localToRepository.containsKey(URI.create(withoutHash(uri)))).toSet(); // If we found any, re-map and update! if (!localUris.isEmpty()) { for (final String localURI : localUris) { final String localBase = withoutHash(localURI); final String resolved = localURI.replace(localBase, localToRepository.get(URI.create(localBase)).toString()); ResourceUtils.renameResource(updatedModel.getResource(localURI), resolved); } if (!useSparql) { final ByteArrayOutputStream out = new ByteArrayOutputStream(); updatedModel.write(out, "TTL"); try (FcrepoResponse response = client.put(toRemap) .body(new ByteArrayInputStream(out.toByteArray()), "text/turtle").preferLenient() .perform()) { checkError(response); } catch (final Exception e) { throw new RuntimeException(e); } } else { try (FcrepoResponse response = client.patch(toRemap) .body(makeSparqlPatch(originalModel, updatedModel)).perform()) { checkError(response); } catch (final Exception e) { throw new RuntimeException(e); } } } } private String withoutHash(final String uri) { if (uri.contains("#")) { return uri.substring(0, uri.indexOf('#')); } else { return uri; } } } // Creates a SPARQL/Update patch which, when applied to the original model, result in the updated. private static InputStream makeSparqlPatch(final Model orig, final Model updated) { try { final ByteArrayOutputStream body = new ByteArrayOutputStream(); IOUtils.write("INSERT DATA {\n".getBytes(UTF_8), body); /* New triples in 'updated' but not in 'orig' */ NTriplesWriter.write(body, updated.listStatements().filterDrop(s -> orig.contains(s)).mapWith(Statement::asTriple)); IOUtils.write("};\n".getBytes(UTF_8), body); IOUtils.write("DELETE DATA {\n".getBytes(UTF_8), body); /* Triples in 'orig' but not in 'updated' */ NTriplesWriter.write(body, orig.listStatements().filterDrop(s -> updated.contains(s)).mapWith(Statement::asTriple)); IOUtils.write("}\n".getBytes(UTF_8), body); return new ByteArrayInputStream(body.toByteArray()); } catch (final IOException e) { throw new RuntimeException("Error creating spaeql patch", e); } } private static String fileName(final PackagedResource resource) { String name = new File(resource.getURI().getPath()).getName(); if (!NONRDFSOURCE.equals(resource.getType())) { name = FilenameUtils.removeExtension(name); } try { return URLEncoder.encode(name, "UTF-8"); } catch (final Exception e) { throw new RuntimeException(e); } } private void checkError(final FcrepoResponse response) throws FcrepoOperationFailedException, IOException { if (response.getStatusCode() >= 400) { throw new FcrepoOperationFailedException(response.getUrl(), response.getStatusCode(), IOUtils.toString(response.getBody(), "utf8")); } } }