cz.cas.lib.proarc.common.export.archive.PackageBuilder.java Source code

Java tutorial

Introduction

Here is the source code for cz.cas.lib.proarc.common.export.archive.PackageBuilder.java

Source

/*
 * Copyright (C) 2015 Jan Pokorsky
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
package cz.cas.lib.proarc.common.export.archive;

import com.yourmediashelf.fedora.generated.foxml.DatastreamType;
import com.yourmediashelf.fedora.generated.foxml.DatastreamVersionType;
import com.yourmediashelf.fedora.generated.foxml.DigitalObject;
import com.yourmediashelf.fedora.generated.foxml.PropertyType;
import cz.cas.lib.proarc.common.device.DeviceRepository;
import cz.cas.lib.proarc.common.export.mets.FileMD5Info;
import cz.cas.lib.proarc.common.export.mets.MetsExportException;
import cz.cas.lib.proarc.common.export.mets.MetsUtils;
import cz.cas.lib.proarc.common.fedora.DigitalObjectException;
import cz.cas.lib.proarc.common.fedora.FoxmlUtils;
import cz.cas.lib.proarc.common.fedora.FoxmlUtils.ControlGroup;
import cz.cas.lib.proarc.common.fedora.LocalStorage.LocalObject;
import cz.cas.lib.proarc.common.object.DigitalObjectElement;
import cz.cas.lib.proarc.common.object.DisseminationHandler;
import cz.cas.lib.proarc.mets.DivType;
import cz.cas.lib.proarc.mets.DivType.Fptr;
import cz.cas.lib.proarc.mets.FileType;
import cz.cas.lib.proarc.mets.FileType.FLocat;
import cz.cas.lib.proarc.mets.MdSecType;
import cz.cas.lib.proarc.mets.MdSecType.MdWrap;
import cz.cas.lib.proarc.mets.MdSecType.MdWrap.XmlData;
import cz.cas.lib.proarc.mets.Mets;
import cz.cas.lib.proarc.mets.MetsType.FileSec;
import cz.cas.lib.proarc.mets.MetsType.FileSec.FileGrp;
import cz.cas.lib.proarc.mets.MetsType.MetsHdr;
import cz.cas.lib.proarc.mets.MetsType.MetsHdr.Agent;
import cz.cas.lib.proarc.mets.StructMapType;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.security.NoSuchAlgorithmException;
import java.util.HashMap;
import java.util.List;
import javax.ws.rs.core.Response;
import javax.xml.bind.JAXB;
import javax.xml.datatype.DatatypeConfigurationException;
import javax.xml.datatype.DatatypeFactory;
import javax.xml.datatype.XMLGregorianCalendar;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.commons.io.IOUtils;

/**
 * Builds resulting METS package and a corresponding folder layout.
 *
 * @author Jan Pokorsky
 */
public class PackageBuilder {

    /**
     * A {@link MdWrap#setMDTYPE(java.lang.String) } helper.
     * @see <a href='http://www.loc.gov/standards/mets/docs/mets.v1-9.html#mdWrap'>mdWrap</a>
     */
    public enum MdType {
        DC, MODS
    }

    public static final String METS_FILENAME = "mets.xml";
    /** The type of the structural map of other objects like devices. */
    public static final String STRUCTMAP_OTHERS_TYPE = "OTHERS";
    /** The type of the structural map of digital objects. */
    public static final String STRUCTMAP_PHYSICAL_TYPE = "PHYSICAL";
    /** The ID of the {@code div} containing a list of devices. */
    public static final String DIV_DEVICE_LIST_ID = "DIV_DEVICES";

    private File pkgFolder;
    private URI pkgFolderUri;
    private final File parentFolder;
    private final DatatypeFactory xmlTypes;
    private Mets mets;
    private StructMapType othersStructMap;
    private final Transformer domTransformer;
    private final HashMap<String, DivType> pid2PhysicalDiv;

    public PackageBuilder(File targetFolder) {
        this.parentFolder = targetFolder;
        this.pid2PhysicalDiv = new HashMap<String, DivType>();
        try {
            this.xmlTypes = DatatypeFactory.newInstance();
            this.domTransformer = TransformerFactory.newInstance().newTransformer();
            this.domTransformer.setOutputProperty(OutputKeys.INDENT, "yes");
            this.domTransformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4");
        } catch (DatatypeConfigurationException ex) {
            throw new IllegalStateException(ex);
        } catch (TransformerConfigurationException ex) {
            throw new IllegalStateException(ex);
        }
    }

    public void prepare(List<DigitalObjectElement> objectPath, LocalObject lobj) {
        DigitalObjectElement entry = objectPath.get(0);

        // create package folder
        pkgFolder = new File(parentFolder, FoxmlUtils.pidAsUuid(entry.getPid()));
        if (!pkgFolder.mkdir()) {
            throw new IllegalStateException("The package folder already exists: " + pkgFolder);
        }
        pkgFolderUri = pkgFolder.toURI();

        DigitalObject digitalObject = lobj.getDigitalObject();
        MetsHdr metsHdr = new MetsHdr();
        // XXX should we use rather actual date?
        // for now use modified date as create day to later decide whether fedora contains same or updated object
        //        metsHdr.setCREATEDATE(xmlTypes.newXMLGregorianCalendar());
        metsHdr.setCREATEDATE(getXmlDate(digitalObject, FoxmlUtils.PROPERTY_LASTMODIFIED));
        //        metsHdr.setCREATEDATE(getDate(digitalObject, FoxmlUtils.PROPERTY_CREATEDATE));
        //        metsHdr.setLASTMODDATE(getDate(digitalObject, FoxmlUtils.PROPERTY_LASTMODIFIED));
        Agent agent = new Agent();
        agent.setName("ProArc");
        agent.setROLE("CREATOR");
        agent.setTYPE("OTHER");
        //        agent.setTYPE("ORGANIZATION");
        metsHdr.getAgent().add(agent);

        mets = new Mets();
        //        mets.setID(null);
        mets.setLabel1(getPackageLabel(objectPath));
        mets.setMetsHdr(metsHdr);
        mets.setTYPE(entry.getModelId());

        mets.setFileSec(new FileSec());
    }

    public void build() {
        JAXB.marshal(mets, new File(pkgFolder, METS_FILENAME));
    }

    public DivType addObject(int index, DigitalObjectElement elm, DigitalObjectElement parentElm) {
        DivType div = new DivType();
        String modelId = elm.getModelId();
        String type = getObjectId(modelId);
        div.setID(String.format("div_%s_%04d", type, index));
        div.getCONTENTIDS().add(elm.getPid());
        div.setLabel3(elm.getItem().getLabel());
        div.setORDER(null);
        div.setTYPE(modelId);

        DivType parent = parentElm == null ? null : pid2PhysicalDiv.get(parentElm.getPid());
        if (parent == null) {
            StructMapType structMap = new StructMapType();
            structMap.setDiv(div);
            structMap.setTYPE(STRUCTMAP_PHYSICAL_TYPE);
            structMap.setLabel2("Physical Structure");
            mets.getStructMap().add(structMap);
        } else {
            parent.getDiv().add(div);
        }
        pid2PhysicalDiv.put(elm.getPid(), div);
        return div;
    }

    public DivType addDevice(LocalObject cache) {
        String pid = cache.getPid();
        DivType div = pid2PhysicalDiv.get(pid);
        if (div != null) {
            return div;
        }
        div = new DivType();
        String modelId = DeviceRepository.METAMODEL_ID;
        String type = getObjectId(modelId);
        div.getCONTENTIDS().add(pid);
        div.setLabel3(cache.getLabel());
        div.setORDER(null);
        div.setTYPE(modelId);

        DivType devicesDiv;
        if (othersStructMap == null) {
            othersStructMap = new StructMapType();
            devicesDiv = new DivType();
            devicesDiv.setID(DIV_DEVICE_LIST_ID);
            devicesDiv.setLabel3("List of devices");
            othersStructMap.setDiv(devicesDiv);
            othersStructMap.setTYPE(STRUCTMAP_OTHERS_TYPE);
            othersStructMap.setLabel2("Other objects");
            mets.getStructMap().add(othersStructMap);
        } else {
            devicesDiv = othersStructMap.getDiv();
        }
        div.setID(String.format("div_%s_%04d", type, devicesDiv.getDiv().size() + 1));
        devicesDiv.getDiv().add(div);
        pid2PhysicalDiv.put(pid, div);
        return div;
    }

    public void addFoxmlAsFile(int index, DigitalObjectElement elm, LocalObject obj) throws DigitalObjectException {
        addFoxmlAsFile(index, elm.getModelId(), obj);
    }

    public void addFoxmlAsFile(int index, String modelId, LocalObject obj) throws DigitalObjectException {
        try {
            String uuid = getObjectId(obj.getPid());
            String dsId = "FOXML";
            String modelName = getObjectId(modelId);
            File grpFile = getGroupFile(pkgFolder, dsId, getFilename(index, modelName, uuid, "xml"));
            DigitalObject dObj = obj.getDigitalObject();
            FoxmlUtils.marshal(new StreamResult(grpFile), dObj, true);
            FileMD5Info fileInfo = getDigest(new BufferedInputStream(new FileInputStream(grpFile)));

            FileGrp fileGrp = getMetsFileGrp(dsId);
            FileType fileType = new FileType();
            fileType.setCHECKSUM(fileInfo.getMd5());
            fileType.setCHECKSUMTYPE("MD5");
            fileType.setCREATED(getXmlDate(dObj, FoxmlUtils.PROPERTY_LASTMODIFIED));
            fileType.setID(String.format("%s_%s_%04d_%s", dsId, modelName, index, uuid));
            fileType.setMIMETYPE("text/xml");
            //            fileType.setSEQ(index);
            fileType.setSIZE(fileInfo.getSize());
            fileType.getFLocat().add(createFLocat(grpFile));
            fileGrp.getFile().add(fileType);

            DivType div = pid2PhysicalDiv.get(obj.getPid());
            Fptr fptr = new Fptr();
            fptr.setFILEID(fileType);
            div.getFptr().add(fptr);
        } catch (NoSuchAlgorithmException ex) {
            throw new DigitalObjectException(obj.getPid(), null, ex);
        } catch (IOException ex) {
            throw new DigitalObjectException(obj.getPid(), null, ex);
        }
    }

    public void addStreamAsMdSec(int index, DatastreamType dt, String pid, String modelId, MdType mdType)
            throws DigitalObjectException {
        String uuid = getObjectId(pid);
        String mimetype = dt.getDatastreamVersion().get(0).getMIMETYPE();
        String modelName = getObjectId(modelId);
        DatastreamVersionType ds = dt.getDatastreamVersion().get(0);

        MdSecType mdSec = new MdSecType();
        mdSec.setCREATED(ds.getCREATED());
        mdSec.setID(String.format("DMD_%s_%s_%04d_%s", mdType.name(), modelName, index, uuid));

        MdWrap mdWrap = new MdWrap();
        mdWrap.setMIMETYPE(mimetype);
        mdWrap.setMDTYPE(mdType.name());
        XmlData xmlData = new XmlData();
        xmlData.getAny().addAll(ds.getXmlContent().getAny());
        mdWrap.setXmlData(xmlData);
        mdSec.setMdWrap(mdWrap);
        mets.getDmdSec().add(mdSec);

        DivType div = pid2PhysicalDiv.get(pid);
        div.getDMDID().add(mdSec);
    }

    public void addStreamAsFile(int index, DatastreamType dt, String pid, String modelId,
            DisseminationHandler dHandler) throws DigitalObjectException {
        String dsId = dt.getID();
        String uuid = getObjectId(pid);
        DatastreamVersionType ds = dt.getDatastreamVersion().get(0);
        String mimetype = ds.getMIMETYPE();
        String ext = getMimeFileExtension(mimetype);
        String modelName = getObjectId(modelId);
        File dsFile = getGroupFile(pkgFolder, dsId, getFilename(index, modelName, uuid, ext));
        FileMD5Info fileInfo = copyStream(pid, dt, ds, dHandler, dsFile);

        // add to fileGrp
        FileGrp fileGrp = getMetsFileGrp(dsId);
        FileType fileType = new FileType();
        fileType.setCHECKSUM(fileInfo.getMd5());
        fileType.setCHECKSUMTYPE("MD5");
        fileType.setCREATED(ds.getCREATED());
        fileType.setID(String.format("%s_%s_%04d_%s", dsId, modelName, index, uuid));
        fileType.setMIMETYPE(mimetype);
        //        fileType.setSEQ(index);
        fileType.setSIZE(fileInfo.getSize());
        fileType.getFLocat().add(createFLocat(dsFile));
        fileGrp.getFile().add(fileType);

        DivType div = pid2PhysicalDiv.get(pid);
        Fptr fptr = new Fptr();
        fptr.setFILEID(fileType);
        div.getFptr().add(fptr);
    }

    private FileMD5Info copyStream(String pid, DatastreamType dt, DatastreamVersionType ds,
            DisseminationHandler dHandler, File dsFile) throws DigitalObjectException {
        String dsId = dt.getID();
        ControlGroup ctrlGroup = ControlGroup.fromExternal(dt.getCONTROLGROUP());
        FileMD5Info fileInfo;
        try {
            if (ctrlGroup == ControlGroup.INLINE) {
                DOMSource domSource = new DOMSource(ds.getXmlContent().getAny().get(0));
                domTransformer.transform(domSource, new StreamResult(dsFile));
                fileInfo = getDigest(new BufferedInputStream(new FileInputStream(dsFile)));
            } else {
                Response resp = dHandler.getDissemination(null);
                Object entity = resp.getEntity();
                if (entity instanceof InputStream) {
                    fileInfo = MetsUtils.getDigestAndCopy((InputStream) entity, new FileOutputStream(dsFile));
                } else {
                    String msg = "Unsupported entity " + (entity == null ? null : entity.getClass().getName());
                    throw new DigitalObjectException(pid, null, dsId, msg, null);
                }
            }
            return fileInfo;
        } catch (TransformerException ex) {
            throw new DigitalObjectException(pid, null, dsId, null, ex);
        } catch (NoSuchAlgorithmException ex) {
            throw new DigitalObjectException(pid, null, dsId, null, ex);
        } catch (IOException ex) {
            throw new DigitalObjectException(pid, null, dsId, null, ex);
        }
    }

    private FLocat createFLocat(File dsFile) {
        FLocat fLocat = new FLocat();
        fLocat.setLOCTYPE("URL");
        fLocat.setHref("./" + pkgFolderUri.relativize(dsFile.toURI()).toASCIIString());
        return fLocat;
    }

    private FileGrp getMetsFileGrp(String dsId) {
        List<FileGrp> fileGrps = mets.getFileSec().getFileGrp();
        for (FileGrp fileGrp : fileGrps) {
            if (dsId.equals(fileGrp.getID())) {
                return fileGrp;
            }
        }
        FileGrp fileGrp = new FileGrp();
        fileGrp.setID(dsId);
        fileGrps.add(fileGrp);
        return fileGrp;
    }

    private File getGroupFile(File parent, String grpId, String filename) {
        File dsFolder = new File(parent, grpId);
        dsFolder.mkdirs();
        File dsFile = new File(dsFolder, filename);
        if (dsFile.exists()) {
            throw new IllegalStateException("File exists: " + dsFile);
        }
        return dsFile;
    }

    static String getFilename(int index, String model, String name, String ext) {
        return String.format("%s_%04d_%s.%s", model, index, name, ext);
    }

    private static String getMimeFileExtension(String mime) {
        try {
            return MetsUtils.getMimeToExtension().getProperty(mime);
        } catch (MetsExportException ex) {
            throw new IllegalStateException(ex.getCause().getMessage(), ex);
        }
    }

    static String getObjectId(String pid) {
        return pid.substring(pid.indexOf(':') + 1);
    }

    private XMLGregorianCalendar getXmlDate(DigitalObject dobj, String name) {
        PropertyType createProp = FoxmlUtils.findProperty(dobj, name);
        if (createProp != null) {
            String value = createProp.getVALUE();
            if (value != null && !value.isEmpty()) {
                return xmlTypes.newXMLGregorianCalendar(value);
            }
        }
        return xmlTypes.newXMLGregorianCalendar();
    }

    private static String getPackageLabel(List<DigitalObjectElement> objectPath) {
        StringBuilder sb = new StringBuilder();
        for (DigitalObjectElement elm : objectPath) {
            if (sb.length() > 0) {
                sb.append(", ");
            }
            sb.append(elm.getItem().getLabel());
        }
        return sb.toString();
    }

    private static FileMD5Info getDigest(InputStream is) throws NoSuchAlgorithmException, IOException {
        try {
            FileMD5Info fileInfo = MetsUtils.getDigest(is);
            is.close();
            is = null;
            return fileInfo;
        } finally {
            IOUtils.closeQuietly(is);
        }
    }

}