org.dspace.services.impl.storage.DSpaceStorageService.java Source code

Java tutorial

Introduction

Here is the source code for org.dspace.services.impl.storage.DSpaceStorageService.java

Source

/**
 * The contents of this file are subject to the license and copyright
 * detailed in the LICENSE and NOTICE files at the root of the source
 * tree and available online at
 *
 * http://www.dspace.org/license/
 */

package org.dspace.services.impl.storage;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.security.DigestInputStream;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.io.IOUtils;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.dspace.orm.dao.api.IBitstreamDao;
import org.dspace.orm.dao.api.IBundleDao;
import org.dspace.orm.entity.Bitstream;
import org.dspace.orm.entity.Bundle;
import org.dspace.services.api.configuration.ConfigurationService;
import org.dspace.services.api.configuration.reference.Module;
import org.dspace.services.api.configuration.reference.PropertyReference;
import org.dspace.services.api.storage.StorageException;
import org.dspace.services.api.storage.StorageService;
import org.dspace.util.Utils;
import org.springframework.beans.factory.annotation.Autowired;

import edu.sdsc.grid.io.FileFactory;
import edu.sdsc.grid.io.GeneralFile;
import edu.sdsc.grid.io.GeneralFileOutputStream;
import edu.sdsc.grid.io.local.LocalFile;
import edu.sdsc.grid.io.srb.SRBAccount;
import edu.sdsc.grid.io.srb.SRBFile;
import edu.sdsc.grid.io.srb.SRBFileSystem;

/**
 * @author Joo Melo <jmelo@lyncode.com>
 */
public class DSpaceStorageService implements StorageService {
    private static Logger log = LogManager.getLogger(DSpaceStorageService.class);
    // These settings control the way an identifier is hashed into
    // directory and file names
    //
    // With digitsPerLevel 2 and directoryLevels 3, an identifier
    // like 12345678901234567890 turns into the relative name
    // /12/34/56/12345678901234567890.
    //
    // You should not change these settings if you have data in the
    // asset store, as the BitstreamStorageManager will be unable
    // to find your existing data.
    private static final int digitsPerLevel = 2;
    private static final int directoryLevels = 3;

    @Autowired
    ConfigurationService config;
    @Autowired
    IBitstreamDao bitstreamDao;
    @Autowired
    IBundleDao bundleDao;

    private List<Object> assetstores;
    private int incoming;

    /**
     * Initializes some required objects (assetstores & incoming)
     */
    private void init() {
        if (assetstores == null) {
            assetstores = new ArrayList<Object>();
            GeneralFile obj = readAssetstore("");
            if (obj == null)
                log.error("No default assetstore");
            else
                assetstores.add(obj);

            for (int i = 0;; i++) {
                obj = readAssetstore("." + i);
                if (obj == null)
                    break;
                else
                    assetstores.add(obj);
            }

            // The default value is 0
            incoming = config.getProperty(PropertyReference.key(Module.STORAGE, "assetstore.incoming"),
                    Integer.class, 0);
        }
    }

    /**
     * Reads one assetstore from configuration
     * 
     * @param n
     * @return File or SRBFile
     */
    private GeneralFile readAssetstore(String n) {
        String sAssetstoreDir = config.getProperty(PropertyReference.key(Module.STORAGE, "assetstore.dir" + n));
        if (sAssetstoreDir != null)
            return new LocalFile(sAssetstoreDir);
        else {
            String srbHost = config.getProperty(PropertyReference.key(Module.STORAGE, "srb.host" + n));
            if (srbHost != null) {
                String srbPort = config.getProperty(PropertyReference.key(Module.STORAGE, "srb.port" + n));
                String srbUsername = config.getProperty(PropertyReference.key(Module.STORAGE, "srb.username" + n));
                String srbPassword = config.getProperty(PropertyReference.key(Module.STORAGE, "srb.password" + n));
                String srbH = config.getProperty(PropertyReference.key(Module.STORAGE, "srb.homedirectory" + n));
                String srbDm = config.getProperty(PropertyReference.key(Module.STORAGE, "srb.mdasdomainname" + n));
                String srbD = config
                        .getProperty(PropertyReference.key(Module.STORAGE, "srb.defaultstorageresource" + n));
                String srbZone = config.getProperty(PropertyReference.key(Module.STORAGE, "srb.mcatzone" + n));
                SRBAccount acc = new SRBAccount(srbHost, Integer.parseInt(srbPort), srbUsername, srbPassword, srbH,
                        srbDm, srbD, srbZone);
                SRBFileSystem srbFileSystem = null;
                try {
                    srbFileSystem = new SRBFileSystem(acc);
                } catch (NullPointerException e) {
                    log.error("No SRBAccount for assetstore " + n);
                } catch (IOException e) {
                    log.error("Problem getting SRBFileSystem for assetstore" + n);
                }
                if (srbFileSystem == null) {
                    log.error("SRB FileSystem is null for assetstore " + n);
                }
                String sSRBAssetstore = config
                        .getProperty(PropertyReference.key(Module.STORAGE, "srb.parentdir" + n));
                if (sSRBAssetstore == null)
                    log.error("srb.parentdir is undefined for assetstore " + n);

                return new SRBFile(srbFileSystem, sSRBAssetstore);
            } else {
                return null;
            }
        }
    }

    /**
     * Return the intermediate path derived from the internal_id. This method
     * splits the id into groups which become subdirectories.
     * 
     * @param iInternalId
     *            The internal_id
     * @return The path based on the id without leading or trailing separators
     */
    private String getIntermediatePath(String iInternalId) {
        StringBuffer buf = new StringBuffer();
        for (int i = 0; i < directoryLevels; i++) {
            int digits = i * digitsPerLevel;
            if (i > 0) {
                buf.append(File.separator);
            }
            buf.append(iInternalId.substring(digits, digits + digitsPerLevel));
        }
        buf.append(File.separator);
        return buf.toString();
    }

    private GeneralFile getFile(Bitstream bitstream) throws StorageException {
        // Check that bitstream is not null
        if (bitstream == null) {
            return null;
        }

        // Get the store to use
        int storeNumber = bitstream.getStoreNumber();

        // Default to zero ('assetstore.dir') for backwards compatibility
        if (storeNumber == -1) {
            storeNumber = 0;
        }

        // turn the internal_id into a file path relative to the assetstore
        // directory
        String sInternalId = bitstream.getInternalId();

        Object assetstore = assetstores.get(storeNumber);

        // there are 4 cases:
        // -conventional bitstream, conventional storage
        // -conventional bitstream, srb storage
        // -registered bitstream, conventional storage
        // -registered bitstream, srb storage
        // conventional bitstream - dspace ingested, dspace random name/path
        // registered bitstream - registered to dspace, any name/path
        String sIntermediatePath = null;
        if (bitstream.isRegistered()) {
            sInternalId = sInternalId.substring(Bitstream.REGISTERED_FLAG.length());
            sIntermediatePath = "";
        } else {
            // Sanity Check: If the internal ID contains a
            // pathname separator, it's probably an attempt to
            // make a path traversal attack, so ignore the path
            // prefix. The internal-ID is supposed to be just a
            // filename, so this will not affect normal operation.
            if (sInternalId.indexOf(File.separator) != -1) {
                sInternalId = sInternalId.substring(sInternalId.lastIndexOf(File.separator) + 1);
            }
            sIntermediatePath = this.getIntermediatePath(sInternalId);
        }

        StringBuffer bufFilename = new StringBuffer();
        if (assetstore instanceof LocalFile) {
            try {
                bufFilename.append(((LocalFile) assetstore).getCanonicalPath());
            } catch (IOException e) {
                throw new StorageException(e);
            }
            bufFilename.append(File.separator);
            bufFilename.append(sIntermediatePath);
            bufFilename.append(sInternalId);
            if (log.isDebugEnabled()) {
                log.debug("Local filename for " + sInternalId + " is " + bufFilename.toString());
            }
            return new LocalFile(bufFilename.toString());
        }
        if (assetstore instanceof SRBFile) {
            bufFilename.append(sIntermediatePath);
            bufFilename.append(sInternalId);
            if (log.isDebugEnabled()) {
                log.debug("SRB filename for " + sInternalId + " is " + ((SRBFile) assetstore).toString()
                        + bufFilename.toString());
            }
            return new SRBFile((SRBFile) assetstore, bufFilename.toString());
        }

        throw new StorageException("Unknonwn assetstore type");
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * org.dspace.services.StorageService#retrieve(org.dspace.orm.entity.Bitstream
     * )
     */
    @Override
    public InputStream retrieve(Bitstream bitstream) throws StorageException {
        this.init();
        GeneralFile file = getFile(bitstream);
        try {
            return (file != null) ? FileFactory.newFileInputStream(file) : null;
        } catch (IOException e) {
            throw new StorageException(e);
        }
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * org.dspace.services.StorageService#delete(org.dspace.orm.entity.Bitstream
     * )
     */
    @Override
    public void delete(Bitstream bitstream) throws StorageException {
        this.init();

        List<Bundle> primaries = bitstream.getPrimaryBundles();
        for (Bundle b : primaries) {
            b.setPrimary(null);
            bundleDao.save(b);
        }

        bitstream.setDeleted(true);
        bitstreamDao.save(bitstream);
    }

    /*
     * (non-Javadoc)
     * 
     * @see org.dspace.services.StorageService#store(java.io.InputStream)
     */
    @Override
    public Bitstream store(InputStream input) throws StorageException {
        this.init();
        // Create internal ID
        String id = Utils.generateKey();

        Bitstream bitstream = new Bitstream();
        bitstream.setDeleted(true);
        bitstream.setInternalId(id);
        bitstream.setStoreNumber(incoming);

        bitstreamDao.save(bitstream);

        try {
            GeneralFile file = this.getFile(bitstream);

            if (file != null && file.getParentFile() != null)
                file.getParentFile().mkdirs();

            file.createNewFile();

            GeneralFileOutputStream fos = FileFactory.newFileOutputStream(file);

            // Read through a digest input stream that will work out the MD5
            DigestInputStream dis = null;

            try {
                dis = new DigestInputStream(input, MessageDigest.getInstance("MD5"));
            } catch (NoSuchAlgorithmException nsae) // Should never happen
            {
                log.warn("Caught NoSuchAlgorithmException", nsae);
            }

            IOUtils.copy(dis, fos);
            fos.close();
            input.close();

            bitstream.setSize(file.length());

            if (dis != null) {
                bitstream.setChecksum(Utils.toHex(dis.getMessageDigest().digest()));
                bitstream.setChecksumAlgorithm("MD5");
            }

            bitstream.setDeleted(false);
            bitstreamDao.save(bitstream);

            if (log.isDebugEnabled()) {
                log.debug("Stored bitstream " + bitstream.getID() + " in file " + file.getAbsolutePath());
            }

            return bitstream;
        } catch (IOException e) {
            throw new StorageException(e);
        }

    }

    /*
     * (non-Javadoc)
     * 
     * @see org.dspace.services.StorageService#register(int, java.lang.String)
     */
    @Override
    public Bitstream register(int assetstore, String path) throws StorageException {
        this.init();

        // mark this bitstream as a registered bitstream
        String sInternalId = Bitstream.REGISTERED_FLAG + path;

        // Create a deleted bitstream row, using a separate DB connection
        Bitstream bitstream = new Bitstream();
        bitstream.setDeleted(true);
        bitstream.setInternalId(sInternalId);
        bitstream.setStoreNumber(assetstore);

        bitstreamDao.save(bitstream);

        // get a reference to the file
        GeneralFile file = getFile(bitstream);

        // read through a DigestInputStream that will work out the MD5
        //
        // DSpace refers to checksum, writes it in METS, and uses it as an
        // AIP filename (!), but never seems to validate with it. Furthermore,
        // DSpace appears to hardcode the algorithm to MD5 in some places--see
        // METSExport.java.
        //
        // To remain compatible with DSpace we calculate an MD5 checksum on
        // LOCAL registered files. But for REMOTE (e.g. SRB) files we
        // calculate an MD5 on just the fileNAME. The reasoning is that in the
        // case of a remote file, calculating an MD5 on the file itself will
        // generate network traffic to read the file's bytes. In this case it
        // would be better have a proxy process calculate MD5 and store it as
        // an SRB metadata attribute so it can be retrieved simply from SRB.
        //
        // TODO set this up as a proxy server process so no net activity

        // FIXME this is a first class HACK! for the reasons described above
        if (file instanceof LocalFile) {

            // get MD5 on the file for local file
            DigestInputStream dis = null;
            try {
                dis = new DigestInputStream(FileFactory.newFileInputStream(file), MessageDigest.getInstance("MD5"));
            } catch (NoSuchAlgorithmException e) {
                log.warn("Caught NoSuchAlgorithmException", e);
                throw new StorageException("Invalid checksum algorithm", e);
            } catch (IOException e) {
                log.error("File: " + file.getAbsolutePath() + " to be registered cannot be opened - is it "
                        + "really there?");
                throw new StorageException(e);
            }
            final int BUFFER_SIZE = 1024 * 4;
            final byte[] buffer = new byte[BUFFER_SIZE];
            try {
                while (true) {
                    final int count = dis.read(buffer, 0, BUFFER_SIZE);
                    if (count == -1) {
                        break;
                    }
                }
                bitstream.setChecksum(Utils.toHex(dis.getMessageDigest().digest()));
                dis.close();
            } catch (IOException e) {
                throw new StorageException(e);
            }
        } else if (file instanceof SRBFile) {
            if (!file.exists()) {
                log.error("File: " + file.getAbsolutePath() + " is not in SRB MCAT");
                throw new StorageException("File is not in SRB MCAT");
            }

            // get MD5 on just the filename (!) for SRB file
            int iLastSlash = path.lastIndexOf('/');
            String sFilename = path.substring(iLastSlash + 1);
            MessageDigest md = null;
            try {
                md = MessageDigest.getInstance("MD5");
            } catch (NoSuchAlgorithmException e) {
                log.error("Caught NoSuchAlgorithmException", e);
                throw new StorageException("Invalid checksum algorithm", e);
            }
            bitstream.setChecksum(Utils.toHex(md.digest(sFilename.getBytes())));
        } else {
            throw new StorageException("Unrecognized file type - " + "not local, not SRB");
        }

        bitstream.setChecksumAlgorithm("MD5");
        bitstream.setSize(file.length());
        bitstream.setDeleted(false);

        bitstreamDao.save(bitstream);

        if (log.isDebugEnabled()) {
            log.debug("Stored bitstream " + bitstream.getID() + " in file " + file.getAbsolutePath());
        }
        return bitstream;
    }
}