org.dspace.resourcesync.ResourceSyncGenerator.java Source code

Java tutorial

Introduction

Here is the source code for org.dspace.resourcesync.ResourceSyncGenerator.java

Source

/**
 * The contents of this file are subject to the license and copyright
 * detailed in the LICENSE and NOTICE files at the root of the source
 * tree and available online at
 *
 * http://www.dspace.org/license/
 */
package org.dspace.resourcesync;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.sql.SQLException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.DefaultParser;
import org.apache.log4j.Logger;
import org.dspace.content.Collection;
import org.dspace.content.Community;
import org.dspace.content.DSpaceObject;
import org.dspace.content.factory.ContentServiceFactory;
import org.dspace.content.service.CollectionService;
import org.dspace.content.service.CommunityService;
import org.dspace.content.service.SiteService;
import org.dspace.utils.DSpace;
import org.dspace.core.Constants;
import org.dspace.core.Context;
import org.dspace.handle.factory.HandleServiceFactory;
import org.openarchives.resourcesync.ResourceSync;
import org.openarchives.resourcesync.ResourceSyncDescription;
import org.openarchives.resourcesync.ResourceSyncDescriptionIndex;

/**
 * @author Richard Jones
 * @author Andrea Bollini (andrea.bollini at 4science.it)
 * @author Andrea Petrucci (andrea.petrucci at 4science.it)
 */
public class ResourceSyncGenerator {
    private static Logger log = Logger.getLogger(ResourceSyncGenerator.class);

    public static void main(String[] args) throws Exception {
        Options options = new Options();
        options.addOption("i", "init", false,
                "Create a fresh ResourceSync description of this repository - this will remove any previous ResourceSync documents");
        options.addOption("u", "update", false,
                "Update the Change List ResourceSync document with the changes since this script last ran");
        options.addOption("r", "rebase", false,
                "Update the Resource List ResourceSync document to reflect the current state of the archive, and bring the Change List up to the same level");
        CommandLineParser parser = new DefaultParser();
        CommandLine cmd = parser.parse(options, args);

        Context context = new Context();
        List<String> handles = buildHandleForResourceSync(context);

        SiteService siteService = ContentServiceFactory.getInstance().getSiteService();
        String siteHandle = siteService.findSite(context).getHandle();
        ResourceSyncGenerator rsg = new ResourceSyncGenerator(context, siteHandle, handles, null);

        try {
            if (cmd.hasOption("i")) {
                rsg.init(context);
            } else if (cmd.hasOption("u")) {
                rsg.update(context);
            } else if (cmd.hasOption("r")) {
                rsg.rebase(context);
            } else {
                HelpFormatter hf = new HelpFormatter();
                hf.printHelp("ResourceSyncGenerator", "Manage ResourceSync documents for DSpace", options, "");
            }
        } finally {
            context.abort();
        }
    }

    private Map<String, UrlManager> ums;
    public static final SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
    public static final SimpleDateFormat sdfChangeList = new SimpleDateFormat("yyyy-MM-dd-HHmmss");
    private Context context;
    private boolean resourceDump = false;
    private boolean changeDump = false;
    private String outdir;
    private List<String> handles = null;
    private Date fromChangeDump = null;
    private String siteHandle;

    public static List<String> buildHandleForResourceSync(Context context) throws SQLException {
        SiteService siteService = ContentServiceFactory.getInstance().getSiteService();
        CommunityService communityService = ContentServiceFactory.getInstance().getCommunityService();
        CollectionService collectionService = ContentServiceFactory.getInstance().getCollectionService();

        String capabilityList = new DSpace().getConfigurationService().getProperty("resourcesync.capabilitylists");
        String handleSite = siteService.findSite(context).getHandle();

        List<String> handles = new ArrayList<String>();
        if (capabilityList.equals("top")) {
            List<Community> communities = communityService.findAllTop(context);
            for (Community c : communities) {
                handles.add(c.getHandle());
            }
            handles.add(handleSite);
        } else if (capabilityList.equals("all")) {
            handles.add(handleSite);
            List<Community> communities = communityService.findAll(context);
            for (Community c : communities) {
                handles.add(c.getHandle());
            }
            List<Collection> collections = collectionService.findAll(context);
            for (Collection c : collections) {
                handles.add(c.getHandle());
            }
        } else if (capabilityList.equals("site")) {
            handles.add(handleSite);
        } else {

            String[] handle = capabilityList.split("\\s+");

            for (String h : handle) {
                DSpaceObject dso = HandleServiceFactory.getInstance().getHandleService().resolveToObject(context,
                        h);
                if (dso == null) {
                    log.error("The handle isn't valid " + h);
                } else if (dso.getType() == Constants.ITEM) {
                    log.error("Can't use ResourceSync with handle of item " + h);
                } else {
                    handles.add(h);
                }
            }
        }
        return handles;
    }

    public ResourceSyncGenerator(Context context, String siteHandle, List<String> handles, Date fromChangeDump)
            throws IOException {
        this.siteHandle = siteHandle;
        this.handles = handles;
        this.ums = new HashMap<String, UrlManager>();
        for (String h : handles) {
            this.ums.put(h, new UrlManager(siteHandle, h));
        }
        this.context = context;
        this.resourceDump = new DSpace().getConfigurationService()
                .getBooleanProperty("resourcesync.resourcedump.enable");

        this.outdir = new DSpace().getConfigurationService().getProperty("resourcesync.dir");
        if (this.outdir == null) {
            throw new IOException("No configuration for resourcesync.dir");
        }
        this.fromChangeDump = fromChangeDump;
    }

    //////////////////////////////////////////////////////////////////////////////
    // methods to be used to interact with the generator
    //////////////////////////////////////////////////////////////////////////////

    public void init(Context context) throws IOException, SQLException, ParseException {
        this.changeDump = false;
        // make sure that the directory exists, and that it is empty
        this.ensureResourceSyncDirectory();
        this.emptyResourceSyncDirectory();

        // generate the description index document
        this.generateResourceSyncDescriptionIndex(handles);
        for (String handle : this.handles) {

            // generate the resource list
            this.generateResourceList(handle);

            // generate the description document
            this.generateResourceSyncDescription(handle);

            // should we generate a resource dump?
            if (this.resourceDump) {
                this.generateResourceDump(handle);
            }
            // generate the capability list (with a resource list, without a change list, and maybe with a resource dump)
            this.generateCapabilityList(true, false, this.resourceDump, false, this.changeDump, handle);

            // generate the blank changelist as a placeholder for the next iteration
            this.generateBlankChangeList(handle);
        }
    }

    public void update(Context context) throws IOException, SQLException, ParseException {

        this.changeDump = true;

        // check the directory is there
        this.ensureResourceSyncDirectory();

        // generate the latest changelist
        //      String clFilename = this.generateLatestChangeList();

        // add to the change list archive
        String clFilename = null;
        HashMap<String, String> clFilenameList = new HashMap<String, String>();
        for (String handle : this.handles) {

            List<ResourceSyncEvent> rseListFiltered = new ArrayList<ResourceSyncEvent>();
            rseListFiltered = getChange(handle);
            // generate the latest changelist
            clFilename = this.generateLatestChangeList(handle, rseListFiltered);
            clFilenameList.put(handle, clFilename);

            if (this.resourceDump) {
                this.generateChangeDump(handle, rseListFiltered);
                //this.generateResourceDump(handle);
            }

            // update the last modified date in the capability list (and add the
            // changelistarchive if necessary)
            this.updateCapabilityList(handle);
        }
        this.addChangeListToArchive(clFilenameList, handles);
    }

    public void rebase(Context context) throws IOException, SQLException, ParseException {
        this.changeDump = false;
        // make sure that the directory exists, and that it is empty
        this.ensureResourceSyncDirectory();
        this.emptyResourceSyncDirectory();

        // generate the description index document
        this.generateResourceSyncDescriptionIndex(handles);

        String clFilename = null;
        HashMap<String, String> clFilenameList = new HashMap<String, String>();

        for (String handle : this.handles) {

            // generate the resource list
            this.generateResourceList(handle);

            // generate the description document
            this.generateResourceSyncDescription(handle);

            List<ResourceSyncEvent> rseListFiltered = new ArrayList<ResourceSyncEvent>();
            rseListFiltered = getChange(handle);
            // generate the latest changelist
            clFilename = this.generateLatestChangeList(handle, rseListFiltered);
            clFilenameList.put(handle, clFilename);

            // should we generate a resource dump?
            if (this.resourceDump) {
                this.generateResourceDump(handle);
            }
            // generate the capability list (with a resource list, without a change list, and maybe with a resource dump)
            this.updateCapabilityList(handle);
        }
        this.addChangeListToArchive(clFilenameList, handles);
    }

    public List<ResourceSyncEvent> getChange(String handle) {
        Date from = null;
        try {
            if (fromChangeDump == null) {
                from = this.getLastChangeListDate(handle);
            } else {
                from = fromChangeDump;
            }
        } catch (ParseException e) {
            log.error(e.getMessage(), e);
        }
        Date to = new Date();
        List<ResourceSyncEvent> rseList = new ArrayList<ResourceSyncEvent>();
        ResourceSyncAuditService auditService = new ResourceSyncAuditService();
        rseList = auditService.listEvents(from, to, handle);
        return rseList;
    }

    //////////////////////////////////////////////////////////////////////
    // file management utility methods
    //////////////////////////////////////////////////////////////////////

    private void ensureResourceSyncDirectory() throws IOException {
        // make sure our output directory exists
        this.ensureDirectory(this.outdir);
    }

    private void ensureDirectory(String dir) throws IOException {
        File od = new File(dir);
        if (!od.exists()) {
            od.mkdir();
        }
        if (!od.isDirectory()) {
            throw new IOException(dir + " exists, but is not a directory");
        }
    }

    private void emptyResourceSyncDirectory() throws IOException {
        this.emptyDirectory(this.outdir);
    }

    private void deleteFolder(File folder) {
        File[] files = folder.listFiles();
        if (files != null) {
            for (File f : files) {
                if (f.isDirectory()) {
                    deleteFolder(f);
                } else {
                    f.delete();
                }
            }
        }
        folder.delete();
    }

    private void emptyDirectory(String outdir) {
        File out = new File(outdir);
        File[] files = out.listFiles();
        if (files == null) {
            return;
        }
        for (File f : files) {
            if (f.isDirectory()) {
                deleteFolder(f);
            } else {
                f.delete();
            }
        }
    }

    private FileOutputStream getFileOutputStream(String filename) throws IOException {
        FileOutputStream fos = new FileOutputStream(new File(this.outdir + File.separator + filename));
        return fos;
    }

    private FileOutputStream getFileOutputStream(String filename, String handle) throws IOException {
        String rsdFile = getOutdir(handle) + File.separator + filename;
        FileOutputStream fos = new FileOutputStream(new File(rsdFile));
        return fos;
    }

    private String getOutdir(String handle) {
        if (handle != null && !(siteHandle.equals(handle))) {
            return this.outdir + File.separator + handle.replaceAll("/", "-");
        }
        return this.outdir;
    }

    private void deleteFile(String filename) {
        File old = new File(this.outdir + File.separator + filename);
        if (old.exists()) {
            old.delete();
        }
    }

    private String getLastChangeDumpName(String handle) throws ParseException {
        String filename = null;
        Date from = new Date(0);
        File dir;
        if (handle != siteHandle) {
            dir = new File(this.outdir + File.separator + handle.replace("/", "-"));
        } else {
            dir = new File(this.outdir);
        }
        File[] files = dir.listFiles();
        if (files != null) {
            for (File f : dir.listFiles()) {
                if (FileNames.isChangeDump(f)) {
                    String dr = FileNames.changeDumpDate(f);
                    Date possibleFrom = ResourceSyncGenerator.sdfChangeList.parse(dr);
                    if (possibleFrom.getTime() > from.getTime()) {
                        from = possibleFrom;
                        filename = f.getName();
                    }
                }
            }
        }
        return filename;
    }

    private String getLastChangeListName(String handle) throws ParseException {
        String filename = null;
        Date from = new Date(0);
        File dir;
        if (handle != siteHandle) {
            dir = new File(this.outdir + File.separator + handle.replace("/", "-"));
        } else {
            dir = new File(this.outdir);
        }
        File[] files = dir.listFiles();
        if (files != null) {
            for (File f : dir.listFiles()) {
                if (FileNames.isChangeList(f)) {
                    String dr = FileNames.changeListDate(f);
                    Date possibleFrom = ResourceSyncGenerator.sdfChangeList.parse(dr);
                    if (possibleFrom.getTime() > from.getTime()) {
                        from = possibleFrom;
                        filename = f.getName();
                    }
                }
            }
        }
        return filename;
    }

    private Date getLastChangeListDate(String handle) throws ParseException {
        File dir = new File(getOutdir(handle));
        Date from = new Date(0);
        File[] files = dir.listFiles();
        if (files != null) {
            for (File f : dir.listFiles()) {
                if (FileNames.isChangeList(f)) {
                    String dr = FileNames.changeListDate(f);
                    Date possibleFrom = ResourceSyncGenerator.sdfChangeList.parse(dr);
                    if (possibleFrom.getTime() > from.getTime()) {
                        from = possibleFrom;
                    }
                }
            }
        }
        return from;
    }

    private boolean fileExists(String filename) {
        String path = this.outdir + File.separator + filename;
        File file = new File(path);
        return file.exists() && file.isFile();
    }

    ////////////////////////////////////////////////////////////////////
    // private document generation methods
    ////////////////////////////////////////////////////////////////////

    private void generateResourceSyncDescriptionIndex(List<String> handles) throws IOException {
        FileOutputStream fos = this.getFileOutputStream(FileNames.resourceSyncDocumentIndex);

        // no need for a DSpace-specific implementation, it is so simple
        ResourceSyncDescriptionIndex desc = new ResourceSyncDescriptionIndex(ResourceSync.CAPABILITY_RESOURCESYNC);
        for (String h : handles) {
            desc.addSourceDescription((this.ums.get(h).resourceSyncDescription()));
        }
        desc.serialise(fos);

        fos.close();

    }

    private void generateResourceSyncDescription(String handle) throws IOException {
        FileOutputStream fos = this.getFileOutputStream(FileNames.resourceSyncDocument, handle);

        // no need for a DSpace-specific implementation, it is so simple
        ResourceSyncDescription desc = new ResourceSyncDescription();
        desc.addCapabilityList(this.ums.get(handle).capabilityList());
        desc.serialise(fos);

        fos.close();
    }

    private void updateCapabilityList(String handle) throws IOException, ParseException {
        // just regenerate the capability list in its entirity
        this.generateCapabilityList(true, false, this.resourceDump, true, this.changeDump, handle);
    }

    private void generateCapabilityList(boolean resourceList, boolean changeListArchive, boolean resourceDump,
            boolean changeList, boolean changeDump, String handle) throws IOException, ParseException {
        // get the latest change list if there is one and we want one
        String changeListUrl = null;
        String changeDumpUrl = null;
        if (changeList) {
            String clFilename = this.getLastChangeListName(handle);
            changeListUrl = this.ums.get(handle).changeList(clFilename);

        }
        if (changeDump) {
            String cdFilename = this.getLastChangeDumpName(handle);
            changeDumpUrl = this.ums.get(handle).changeDump(cdFilename);
        }
        FileOutputStream fos = this.getFileOutputStream(FileNames.capabilityList, handle);

        DSpaceCapabilityList dcl = new DSpaceCapabilityList(this.context, resourceList, changeListArchive,
                resourceDump, changeList, changeDump, changeListUrl, changeDumpUrl, ums.get(handle));
        dcl.serialise(fos);

        fos.close();
    }

    private void generateResourceList(String handle) throws SQLException, IOException {
        String directoryName;
        String path;
        if (!handle.equals(siteHandle)) {
            directoryName = handle.replace("/", "-");
            path = new DSpace().getConfigurationService().getProperty("resourcesync.resourcesync.dir");
            path = path.concat("/" + directoryName);
            File directory = new File(path);
            directory.mkdir();
        }
        FileOutputStream fos = this.getFileOutputStream(File.separator + FileNames.resourceList, handle);

        DSpaceResourceList drl = new DSpaceResourceList(this.context);
        drl.serialise(fos, handle, this.ums.get(handle));

        fos.close();
    }

    private void generateResourceDump(String handle) throws IOException, SQLException {
        if (handle.equals(siteHandle)) {
            this.deleteFile(FileNames.resourceDump);
            this.deleteFile(FileNames.resourceDumpZip);
        }
        DSpaceResourceDump drd = new DSpaceResourceDump(this.context);
        drd.serialise(getOutdir(handle), handle, ums.get(handle));
    }

    private void generateChangeDump(String handle, List<ResourceSyncEvent> rseList)
            throws IOException, SQLException {
        DSpaceChangeDump drd = new DSpaceChangeDump(this.context);
        drd.serialiseChangeDump(getOutdir(handle), ums.get(handle), rseList);
    }

    public void generateChangeDump(String handle, List<ResourceSyncEvent> rseList, OutputStream os)
            throws IOException, SQLException {
        DSpaceChangeDump drd = new DSpaceChangeDump(this.context);
        drd.serialiseChangeDump(getOutdir(handle), ums.get(handle), rseList, os);
    }

    private String generateLatestChangeList(String handle, List<ResourceSyncEvent> rseListFiltered)
            throws ParseException, IOException, SQLException {

        Date from = this.getLastChangeListDate(handle);
        Date to = new Date();
        String tr = sdfChangeList.format(to);
        String filename = FileNames.changeList(tr);
        FileOutputStream fos = this.getFileOutputStream(filename, handle);
        DSpaceChangeList dcl = new DSpaceChangeList(this.context, from, to, ums.get(handle)); //TO-DO
        dcl.serialiseForDump(fos, ums.get(handle), rseListFiltered);

        return filename;
    }

    private void generateBlankChangeList(String handle) throws IOException, SQLException, ParseException {

        Date to = new Date();
        String tr = ResourceSyncGenerator.sdfChangeList.format(to);

        FileOutputStream fos = this.getFileOutputStream(FileNames.changeList(tr), handle);

        // generate the changelist for the period (which is of 0 length)
        DSpaceChangeList dcl = new DSpaceChangeList(this.context, to, to, ums.get(handle));
        dcl.serialise(fos);
        fos.close();
    }

    private void addChangeListToArchive(HashMap<String, String> filename, List<String> handles)
            throws IOException, ParseException {

        DSpaceChangeListArchive dcla = new DSpaceChangeListArchive(this.context);
        dcla.setUm(ums.get(siteHandle));
        FileOutputStream fos = this.getFileOutputStream(FileNames.changeListArchive, siteHandle);

        for (String handle : handles) {
            // get the URL of the new changelist
            String loc = this.ums.get(handle).changeList(filename.get(handle));

            // get the date of the new changelist (it is encoded in the filename)
            String dr = FileNames.changeListDate(filename.get(handle));
            Date date = ResourceSyncGenerator.sdfChangeList.parse(dr);

            dcla.addChangeList(loc, date);
        }
        dcla.serialise(fos);

        fos.close();
    }
}