de.nrw.hbz.regal.sync.Syncer.java Source code

Java tutorial

Introduction

Here is the source code for de.nrw.hbz.regal.sync.Syncer.java

Source

/*
 * Copyright 2012 hbz NRW (http://www.hbz-nrw.de/)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */
package de.nrw.hbz.regal.sync;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.List;
import java.util.Vector;

import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import de.nrw.hbz.regal.DigitoolPidStrategy;
import de.nrw.hbz.regal.PIDReporter;
import de.nrw.hbz.regal.sync.extern.DigitalEntity;
import de.nrw.hbz.regal.sync.extern.DigitalEntityBuilderInterface;
import de.nrw.hbz.regal.sync.ingest.DownloaderInterface;
import de.nrw.hbz.regal.sync.ingest.IngestInterface;
import de.nrw.hbz.regal.sync.ingest.KeystoreConf;

/**
 * @author Jan Schnasse, schnasse@hbz-nrw.de
 */
public class Syncer {

    @SuppressWarnings({ "serial", "javadoc" })
    public class ReadFileException extends RuntimeException {
        public ReadFileException(Throwable cause) {
            super(cause);
        }
    }

    @SuppressWarnings({ "serial", "javadoc" })
    public class CloseReaderException extends RuntimeException {
        public CloseReaderException(Throwable cause) {
            super(cause);
        }
    }

    @SuppressWarnings({ "serial", "javadoc" })
    public class IngestItemException extends RuntimeException {
        public IngestItemException(Throwable cause) {
            super(cause);
        }
    }

    final static Logger logger = LoggerFactory.getLogger(Syncer.class);

    private PIDReporter harvester;
    private IngestInterface ingester;
    private DownloaderInterface downloader;
    private DigitalEntityBuilderInterface builder;
    private String mode;
    private String user;
    private String password;
    private String dtl;
    private String cache;
    private String oai;
    private String set;
    private String timestamp;
    // private String fedoraBase;
    private String host;
    private String pidListFile;
    private Options options;
    private String namespace;
    KeystoreConf kconf = null;

    /**
     * @param ingester
     *            the ingester is used to pass data to the archive
     * @param downloader
     *            the downloader is used to download data from a foreign system
     * @param builder
     *            the builder assembles DigitalEntities from the downloaded data
     *            and passes it to the ingester.
     */
    public Syncer(IngestInterface ingester, DownloaderInterface downloader, DigitalEntityBuilderInterface builder) {
        this.ingester = ingester;
        this.downloader = downloader;
        this.builder = builder;
        options = new Options();

        options.addOption("?", "help", false, "Print usage information");
        options.addOption("m", "mode", true, "Specify mode: \n "
                + "INIT: All PIDs will be downloaded. All pids will be updated or created.\n"
                + "SYNC: Modified or new PIDs will be downloaded and updated or created\n "
                + "CONT: All PIDs that aren't already downloaded will be downloaded and created or updated\n"
                + "UPDT: In accordance to the timestamp all modified PIDs will be reingested"
                + "PIDL: Use this mode in combination with -list to provide a file with a newline separated pidlist"
                + "DELE: Use this mode in combination with -list to provide a file with a newline separated pidlist");
        options.addOption("u", "user", true, "Specify username");
        options.addOption("p", "password", true, "Specify password");
        options.addOption("dtl", "dtl", true, "Specify digitool url");
        options.addOption("cache", "cache", true, "Specify local directory");
        options.addOption("oai", "oai", true, "Specify the OAI-PMH endpoint");
        Option setOption = new Option("set", "set", true, "Specify an OAI setSpec");
        setOption.setValueSeparator(',');
        setOption.setRequired(false);
        setOption.setOptionalArg(true);
        options.addOption(setOption);
        options.addOption("timestamp", "timestamp", true, "Specify a local file e.g. .oaitimestamp");
        options.addOption("fedoraBase", "fedoraBase", true, "The Fedora Baseurl");
        options.addOption("host", "host", true, "The Fedora Baseurl");
        options.addOption("namespace", "namespace", true, "The namespace to operate on.");
        options.addOption("list", "list", true,
                "Path to a file with a newline separated pidlist. Only needed in combination with mode PIDL and DELE.");
        options.addOption("keystoreLocation", "keystoreLocation", true, "Specify a keystore for ssl");
        options.addOption("keystorePassword", "keystorePassword", true, "Specify a keystore password for ssl");
    }

    /**
     * @param args
     *            usage: -oai,--oai <arg> Specify the OAI-PMH endpoint
     *            -m,--mode <arg> Specify mode: INIT: All PIDs will be
     *            downloaded. All pids will be updated or created. SYNC:
     *            Modified or new PIDs will be downloaded and updated or created
     *            CONT: All PIDs that aren't already downloaded will be
     *            downloaded and created or updated UPDT: In accordance to the
     *            timestamp all modified PIDs will be reingestedPIDL: Use this
     *            mode in combination with -list to provide a file with a
     *            newline separated pidlistMODL: Recreates only the Content
     *            modelDELE: Use this mode in combination with -list to provide
     *            a file with a newline separated pidlist -?,--help Print usage
     *            information -cache,--cache <arg> Specify local directory
     *            -dtl,--dtl <arg> Specify digitool url
     *            -fedoraBase,--fedoraBase <arg> The Fedora Baseurl
     *            -host,--host <arg> The Fedora Baseurl -list,--list
     *            <arg> Path to a file with a newline separated pidlist.
     *            Only needed in combination with mode PIDL and DELE.
     *            -namespace,--namespace <arg> The namespace to operate
     *            on. -p,--password <arg> Specify password -set,--set
     *            <arg> Specify an OAI setSpec -timestamp,--timestamp
     *            <arg> Specify a local file e.g. .oaitimestamp -u,--user
     *            <arg> Specify username
     */
    public void main(String[] args) {
        init(args);
        run();
    }

    /**
     * @param args
     *            main args
     */
    public void init(String[] args) {

        DigitoolDownloadConfiguration config = new DigitoolDownloadConfiguration(args, options, Syncer.class);

        if (config.hasOption("help") | !config.hasOption("mode") | !config.hasOption("user")
                | !config.hasOption("password") | !config.hasOption("dtl") | !config.hasOption("cache")
                | !config.hasOption("oai") | !config.hasOption("timestamp") | !config.hasOption("fedoraBase")
                | !config.hasOption("host") | !config.hasOption("namespace"))

        {
            showHelp(options);
            return;
        }

        mode = config.getOptionValue("mode");
        user = config.getOptionValue("user");
        password = config.getOptionValue("password");
        dtl = config.getOptionValue("dtl");
        cache = config.getOptionValue("cache");
        oai = config.getOptionValue("oai");
        set = config.getOptionValue("set");
        timestamp = config.getOptionValue("timestamp");
        // fedoraBase = config.getOptionValue("fedoraBase");
        host = config.getOptionValue("host");
        namespace = config.getOptionValue("namespace");
        pidListFile = null;
        if (config.hasOption("list")) {
            pidListFile = config.getOptionValue("list");
        }

        harvester = new de.nrw.hbz.regal.PIDReporter(oai, timestamp);
        downloader.init(dtl, cache);
        KeystoreConf kconf = new KeystoreConf();
        kconf.location = config.getOptionValue("keystoreLocation");
        kconf.password = config.getOptionValue("keystorePassword");
        ingester.init(host, user, password, namespace, kconf);
    }

    private void showHelp(Options opts) {
        HelpFormatter help = new HelpFormatter();
        help.printHelp(" ", opts);
    }

    private void run() {
        if (mode.compareTo("INIT") == 0) {
            init(set);
        } else if (mode.compareTo("SYNC") == 0) {
            sync(set);
        } else if (mode.compareTo("CONT") == 0) {
            cont(set);
        } else if (mode.compareTo("UPDT") == 0) {
            updt(set);
        } else if (mode.compareTo("PIDL") == 0) {

            pidl(pidListFile);

        } else if (mode.compareTo("DELE") == 0) {

            dele(pidListFile);

        } else if (mode.compareTo("TEST") == 0) {
            ingester.test();
        }

    }

    /*
     * "INIT: All PIDs will be downloaded. All pids will be updated or created.\n"
     */
    void init(String sets) {
        boolean harvestFromScratch = true;
        boolean forceDownload = true;

        List<String> pids = harvester.harvest(sets, harvestFromScratch, new DigitoolPidStrategy(), "oai_dc");
        logger.info("Verarbeite " + pids.size() + " Dateneinheiten.");

        int size = pids.size();
        for (int i = 0; i < size; i++) {
            try {
                logger.info((i + 1) + " / " + size);
                String pid = pids.get(i);
                String baseDir = downloader.download(pid, forceDownload);
                logger.info("\tBuild Bean \t" + pid);

                if (!downloader.hasUpdated()) {
                    logger.info("New Files Available: Start Ingest!");
                    DigitalEntity dtlBean = builder.build(baseDir, pids.get(i));

                    ingester.ingest(dtlBean);
                    dtlBean = null;
                } else if (downloader.hasUpdated()) {
                    logger.info("Update Files!");
                    DigitalEntity dtlBean = builder.build(baseDir, pids.get(i));
                    ingester.ingest(dtlBean);
                    dtlBean = null;
                }

            } catch (Exception e) {
                logger.error("", e);

            }
        }

    }

    /*
     * +
     * "SYNC: Modified or new PIDs will be downloaded and updated or created\n "
     */
    void sync(String sets) {
        boolean harvestFromScratch = false;
        boolean forceDownload = true;

        List<String> pids = harvester.harvest(sets, harvestFromScratch, new DigitoolPidStrategy(), "oai_dc");
        logger.info("Verarbeite " + pids.size() + " Dateneinheiten.");

        int size = pids.size();
        for (int i = 0; i < size; i++) {
            try {
                logger.info((i + 1) + " / " + size);
                String pid = pids.get(i);
                String baseDir = downloader.download(pid, forceDownload);
                logger.info("\tBuild Bean \t" + pid);

                if (!downloader.hasUpdated()) {
                    logger.info("New Files Available: Start Ingest!");
                    DigitalEntity dtlBean = builder.build(baseDir, pids.get(i));

                    ingester.ingest(dtlBean);
                    dtlBean = null;
                } else if (downloader.hasUpdated()) {
                    logger.info("Update Files!");
                    DigitalEntity dtlBean = builder.build(baseDir, pids.get(i));
                    ingester.update(dtlBean);
                    dtlBean = null;
                }

            } catch (Exception e) {
                logger.error(e.toString(), e);
            }
        }

    }

    /*
     * +
     * "CONT: All PIDs that aren't already downloaded will be downloaded and created or updated\n"
     */
    void cont(String sets) {
        boolean harvestFromScratch = true;
        boolean forceDownload = false;
        List<String> pids = harvester.harvest(sets, harvestFromScratch, new DigitoolPidStrategy(), "oai_dc");
        logger.info("Verarbeite " + pids.size() + " Dateneinheiten.");
        int size = pids.size();
        for (int i = 0; i < size; i++) {
            try {
                logger.info((i + 1) + " / " + size);
                String pid = pids.get(i);
                String baseDir = downloader.download(pid, forceDownload);
                logger.info("\tBuild Bean \t" + pid);

                if (!downloader.hasUpdated() && downloader.hasDownloaded()) {
                    logger.info("New Files Available: Start Ingest!");
                    DigitalEntity dtlBean = builder.build(baseDir, pids.get(i));

                    ingester.ingest(dtlBean);
                    dtlBean = null;
                }

            } catch (Exception e) {
                logger.error(e.toString(), e);
            }
        }

    }

    /*
     * +
     * "UPDT: in accordance to the timestamp all modified PIDs will be reingested"
     */
    void updt(String sets) {
        boolean harvestFromScratch = false;
        boolean forceDownload = false;

        List<String> pids = harvester.harvest(sets, harvestFromScratch, new DigitoolPidStrategy(), "oai_dc");
        logger.info("Verarbeite " + pids.size() + " Dateneinheiten.");

        int size = pids.size();
        for (int i = 0; i < size; i++) {
            try {
                logger.info((i + 1) + " / " + size);
                String pid = pids.get(i);
                String baseDir = downloader.download(pid, forceDownload);
                logger.info("\tBuild Bean \t" + pid);

                logger.info("New Files Available: Start Ingest!");
                DigitalEntity dtlBean = builder.build(baseDir, pid);
                ingester.delete(pid);
                ingester.ingest(dtlBean);
                dtlBean = null;

            } catch (Exception e) {
                logger.error(e.toString(), e);
            }
        }
    }

    void pidl(String pidFile) {
        Vector<String> pids;

        pids = readPidlist(pidFile);
        int size = pids.size();
        for (int i = 0; i < size; i++) {
            try {
                logger.info((i + 1) + " / " + size);
                String pid = pids.get(i);

                String baseDir = downloader.download(pid, false);

                if (!downloader.hasUpdated()) {

                    DigitalEntity dtlBean = builder.build(baseDir, pids.get(i));

                    ingester.ingest(dtlBean);
                    dtlBean = null;
                    logger.info((i + 1) + "/" + size + " " + pid + " has been processed!\n");
                } else if (downloader.hasUpdated()) {

                    DigitalEntity dtlBean = builder.build(baseDir, pids.get(i));
                    ingester.ingest(dtlBean);
                    dtlBean = null;
                    logger.info((i + 1) + "/" + size + " " + pid + " has been updated!\n");
                }

            } catch (Exception e) {
                logger.error(e.toString(), e);
            }
        }

    }

    void dele(String pidFile) {
        Vector<String> pids;
        pids = readPidlist(pidFile);
        int size = pids.size();
        for (int i = 0; i < size; i++) {
            logger.info((i + 1) + " / " + size);
            String pid = pids.get(i);
            ingester.delete(pid);
            logger.info((i + 1) + "/" + size + " " + pid + " deleted!\n");
        }
    }

    private Vector<String> readPidlist(String pidFile) {
        File file = new File(pidFile);
        Vector<String> result = new Vector<String>();
        BufferedReader reader = null;
        String str = null;
        try {
            reader = new BufferedReader(new FileReader(file));
            while ((str = reader.readLine()) != null) {
                result.add(str);
            }
        } catch (IOException e) {
            throw new ReadFileException(e);
        } finally {
            if (reader != null)
                try {
                    reader.close();
                } catch (IOException e) {
                    throw new CloseReaderException(e);
                }

        }
        return result;
    }

}