Java tutorial
/* * Copyright 2015 DuraSpace, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.fcrepo.migration.foxml; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import javax.xml.bind.JAXBContext; import javax.xml.bind.JAXBElement; import javax.xml.bind.JAXBException; import javax.xml.bind.Unmarshaller; import javax.xml.stream.XMLEventReader; import javax.xml.stream.XMLEventWriter; import javax.xml.stream.XMLInputFactory; import javax.xml.stream.XMLOutputFactory; import javax.xml.stream.XMLStreamConstants; import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamReader; import javax.xml.stream.events.XMLEvent; import org.apache.commons.codec.binary.Base64OutputStream; import org.fcrepo.migration.ContentDigest; import org.fcrepo.migration.DatastreamInfo; import org.fcrepo.migration.DatastreamVersion; import org.fcrepo.migration.DefaultContentDigest; import org.fcrepo.migration.DefaultObjectInfo; import org.fcrepo.migration.FedoraObjectProcessor; import org.fcrepo.migration.ObjectInfo; import org.fcrepo.migration.ObjectProperties; import org.fcrepo.migration.ObjectReference; import org.fcrepo.migration.StreamingFedoraObjectHandler; /** * A FedoraObjectProcessor implementation that uses the STaX API to process * a FOXML XML InputStream. * @author mdurbin */ public class FoxmlInputStreamFedoraObjectProcessor implements FedoraObjectProcessor { private static final String FOXML_NS = "info:fedora/fedora-system:def/foxml#"; private URLFetcher fetcher; private String localFedoraServer; private InternalIDResolver idResolver; private XMLStreamReader reader; private List<File> tempFiles; boolean isFedora2 = false; /** * The basic object information read from the XML stream at construction * time by processing the root XML element and its attributes. */ private ObjectInfo objectInfo; /** * foxml input stream fedora object processor. * @param is the input stream * @param fetcher the fetcher * @param resolver the resolver * @param localFedoraServer the host and port (formatted like "localhost:8080") of the fedora 3 server * from which the content exposed by the "is" parameter comes. * @throws XMLStreamException xml stream exception */ public FoxmlInputStreamFedoraObjectProcessor(final InputStream is, final URLFetcher fetcher, final InternalIDResolver resolver, final String localFedoraServer) throws XMLStreamException { this.fetcher = fetcher; this.idResolver = resolver; this.localFedoraServer = localFedoraServer; final XMLInputFactory factory = XMLInputFactory.newFactory(); reader = factory.createXMLStreamReader(is); reader.nextTag(); final Map<String, String> attributes = getAttributes(reader, "PID", "VERSION", "FEDORA_URI", "schemaLocation"); if (attributes.get("VERSION") == null || !attributes.get("VERSION").equals("1.1")) { isFedora2 = true; } objectInfo = new DefaultObjectInfo(attributes.get("PID"), attributes.get("FEDORA_URI")); while (reader.next() == XMLStreamConstants.CHARACTERS) { } tempFiles = new ArrayList<File>(); } @Override public ObjectInfo getObjectInfo() { return objectInfo; } @Override public void processObject(final StreamingFedoraObjectHandler handler) { handler.beginObject(objectInfo); Foxml11DatastreamInfo dsInfo = null; try { handler.processObjectProperties(readProperties()); while (reader.hasNext()) { if (reader.isCharacters()) { if (!reader.isWhiteSpace()) { throw new RuntimeException("Unexpected character data! \"" + reader.getText() + "\""); } else { // skip whitespace... } } else if (reader.isStartElement()) { if (reader.getLocalName().equals("datastream") && reader.getNamespaceURI().equals(FOXML_NS)) { dsInfo = new Foxml11DatastreamInfo(objectInfo, reader); } else if (reader.getLocalName().equals("datastreamVersion")) { final DatastreamVersion v = new Foxml11DatastreamVersion(dsInfo, reader); handler.processDatastreamVersion(v); } else if (reader.getLocalName().equals("disseminator") && isFedora2) { readUntilClosed("disseminator", FOXML_NS); handler.processDisseminator(); } else { throw new RuntimeException("Unexpected element! \"" + reader.getLocalName() + "\"!"); } } else if (reader.isEndElement() && (dsInfo != null && reader.getLocalName().equals("datastream"))) { dsInfo = null; } else if (reader.isEndElement() && reader.getLocalName().equals("digitalObject")) { // end of document.... handler.completeObject(objectInfo); cleanUpTempFiles(); } else { throw new RuntimeException("Unexpected xml structure! \"" + reader.getEventType() + "\" at line " + reader.getLocation().getLineNumber() + ", column " + reader.getLocation().getColumnNumber() + "!" + (reader.isCharacters() ? " \"" + reader.getText() + "\"" : "")); } reader.next(); } } catch (XMLStreamException | JAXBException e) { handler.abortObject(objectInfo); cleanUpTempFiles(); throw new RuntimeException(e); } finally { try { reader.close(); } catch (final XMLStreamException e) { throw new RuntimeException(e); } } } private void cleanUpTempFiles() { for (final File f : this.tempFiles) { f.delete(); } } private ObjectProperties readProperties() throws JAXBException, XMLStreamException { final JAXBContext jc = JAXBContext.newInstance(FoxmlObjectProperties.class); final Unmarshaller unmarshaller = jc.createUnmarshaller(); final JAXBElement<FoxmlObjectProperties> p = unmarshaller.unmarshal(reader, FoxmlObjectProperties.class); final FoxmlObjectProperties properties = p.getValue(); if (isFedora2) { // Fedora 2 uses the rdf:type property with a literal value to differentiate between // objects, behavior mechanism objects and behavior definition objects. That literal // cannot be retained as an rdf type in fedora4, nor can we use the generic mapping // to map it, so we convert it to a dcterms:type right here. for (FoxmlObjectProperty prop : properties.properties) { if (prop.getName().equals("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")) { prop.name = "http://purl.org/dc/terms/type"; } } } return properties; } private void readUntilClosed(final String name, final String namespace) throws XMLStreamException { while (reader.hasNext()) { if (reader.isEndElement() && reader.getLocalName().equals(name) && reader.getNamespaceURI().equals(namespace)) { return; } else { // skip all other stuff.... } reader.next(); } } private class Foxml11DatastreamInfo implements DatastreamInfo { private String id; private String controlGroup; private String fedoraUri; private String state; private boolean versionable; private ObjectInfo objectInfo; public Foxml11DatastreamInfo(final ObjectInfo objectInfo, final XMLStreamReader reader) { this.objectInfo = objectInfo; final Map<String, String> attributes = getAttributes(reader, "ID", "CONTROL_GROUP", "FEDORA_URI", "STATE", "VERSIONABLE"); id = attributes.get("ID"); controlGroup = attributes.get("CONTROL_GROUP"); fedoraUri = attributes.get("FEDORA_URI"); state = attributes.get("STATE"); versionable = Boolean.valueOf(attributes.get("VERSIONABLE")); } @Override public ObjectInfo getObjectInfo() { return objectInfo; } @Override public String getDatastreamId() { return id; } @Override public String getControlGroup() { return controlGroup; } @Override public String getFedoraURI() { return fedoraUri; } @Override public String getState() { return state; } @Override public boolean getVersionable() { return versionable; } } public class Foxml11DatastreamVersion implements DatastreamVersion { private DatastreamInfo dsInfo; private String id; private String label; private String created; private String mimeType; private String altIds; private String formatUri; private long size; private ContentDigest contentDigest; private CachedContent dsContent; /** * foxml datastream version. * @param dsInfo the datastream information * @param reader the reader * @throws XMLStreamException xml stream exception */ public Foxml11DatastreamVersion(final DatastreamInfo dsInfo, final XMLStreamReader reader) throws XMLStreamException { this.dsInfo = dsInfo; final Map<String, String> dsAttributes = getAttributes(reader, "ID", "LABEL", "CREATED", "MIMETYPE", "ALT_IDS", "FORMAT_URI", "SIZE"); id = dsAttributes.get("ID"); label = dsAttributes.get("LABEL"); created = dsAttributes.get("CREATED"); mimeType = dsAttributes.get("MIMETYPE"); altIds = dsAttributes.get("ALT_IDS"); formatUri = dsAttributes.get("FORMAT_URI"); size = dsAttributes.containsKey("SIZE") ? Long.parseLong(dsAttributes.get("SIZE")) : -1; reader.next(); while (reader.hasNext()) { if (reader.isCharacters()) { if (!reader.isWhiteSpace()) { throw new RuntimeException("Unexpected character data! \"" + reader.getText() + "\""); } else { // skip whitespace... } } else if (reader.isStartElement()) { final String localName = reader.getLocalName(); if (localName.equals("contentDigest")) { final Map<String, String> attributes = getAttributes(reader, "TYPE", "DIGEST"); this.contentDigest = new DefaultContentDigest(attributes.get("TYPE"), attributes.get("DIGEST")); } else if (localName.equals("xmlContent")) { // this XML fragment may not be valid out of context // context, so write it out as a complete XML // file... reader.next(); final ByteArrayOutputStream baos = new ByteArrayOutputStream(); final XMLEventReader eventReader = XMLInputFactory.newFactory() .createXMLEventReader(reader); final XMLEventWriter eventWriter = XMLOutputFactory.newFactory().createXMLEventWriter(baos); while (eventReader.hasNext()) { final XMLEvent event = eventReader.nextEvent(); if (event.isEndElement() && event.asEndElement().getName().getLocalPart().equals("xmlContent") && event.asEndElement().getName().getNamespaceURI().equals(FOXML_NS)) { eventWriter.close(); break; } else { eventWriter.add(event); } } try { dsContent = new MemoryCachedContent(new String(baos.toByteArray(), "UTF-8")); } catch (final UnsupportedEncodingException e) { throw new RuntimeException(e); } } else if (localName.equals("contentLocation")) { final Map<String, String> attributes = getAttributes(reader, "REF", "TYPE"); if (attributes.get("TYPE").equals("INTERNAL_ID")) { dsContent = idResolver.resolveInternalID(attributes.get("REF")); } else { try { String ref = attributes.get("REF"); if (ref.contains("local.fedora.server")) { ref = ref.replace("local.fedora.server", localFedoraServer); } dsContent = new URLCachedContent(new URL(ref), fetcher); } catch (final MalformedURLException e) { throw new RuntimeException(e); } } } else if (localName.equals("binaryContent")) { try { final File f = File.createTempFile("decoded", "file"); tempFiles.add(f); final Base64OutputStream out = new Base64OutputStream(new FileOutputStream(f), false); while (reader.next() == XMLStreamConstants.CHARACTERS) { out.write(reader.getText().getBytes("UTF-8")); } out.flush(); out.close(); dsContent = new FileCachedContent(f); } catch (final IOException e) { throw new RuntimeException(e); } readUntilClosed("binaryContent", FOXML_NS); } else { throw new RuntimeException("Unexpected element! \"" + reader.getLocalName() + "\"!"); } } else if (reader.isEndElement()) { if (reader.getLocalName().equals("datastreamVersion")) { return; } } else { throw new RuntimeException("Unexpected xml structure! \"" + reader.getEventType() + "\" at line " + reader.getLocation().getLineNumber() + ", column " + reader.getLocation().getColumnNumber() + "!" + (reader.isCharacters() ? " \"" + reader.getText() + "\"" : "")); } reader.next(); } } @Override public DatastreamInfo getDatastreamInfo() { return dsInfo; } @Override public String getVersionId() { return id; } @Override public String getMimeType() { return mimeType; } @Override public String getLabel() { return label; } @Override public String getCreated() { return created; } @Override public String getAltIds() { return altIds; } @Override public String getFormatUri() { return formatUri; } @Override public long getSize() { return size; } @Override public ContentDigest getContentDigest() { return contentDigest; } @Override public InputStream getContent() throws IOException { return dsContent.getInputStream(); } @Override public String getExternalOrRedirectURL() { if (dsContent instanceof URLCachedContent) { return ((URLCachedContent) dsContent).getURL().toString(); } else { throw new IllegalStateException(); } } @Override public boolean isFirstVersionIn(final ObjectReference obj) { final List<DatastreamVersion> datastreams = obj .getDatastreamVersions(getDatastreamInfo().getDatastreamId()); return datastreams.indexOf(this) == 0; } @Override public boolean isLastVersionIn(final ObjectReference obj) { final List<DatastreamVersion> datastreams = obj .getDatastreamVersions(getDatastreamInfo().getDatastreamId()); return datastreams.indexOf(this) == datastreams.size() - 1; } } private static Map<String, String> getAttributes(final XMLStreamReader r, final String... allowedNames) { final HashMap<String, String> result = new HashMap<String, String>(); final Set<String> allowed = new HashSet<String>(Arrays.asList(allowedNames)); for (int i = 0; i < r.getAttributeCount(); i++) { final String localName = r.getAttributeLocalName(i); final String value = r.getAttributeValue(i); if (allowed.contains(localName)) { result.put(localName, value); } else { System.err.println("Unexpected attribute: " + localName + " = \"" + value + "\""); } } return result; } }