de.nrw.hbz.regal.sync.ingest.DippDownloader.java Source code

Java tutorial

Introduction

Here is the source code for de.nrw.hbz.regal.sync.ingest.DippDownloader.java

Source

/*
 * Copyright 2012 hbz NRW (http://www.hbz-nrw.de/)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */
package de.nrw.hbz.regal.sync.ingest;

import java.io.File;
import java.io.IOException;
import java.io.StringWriter;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLEncoder;

import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;

import archive.fedora.XmlUtils;

/**
 * http://193.30.112.23:9280/fedora/get/dipp:1001?xml=true
 * http://193.30.112.23:9280/fedora/listDatastreams/dipp:1001?xml=true
 * http://193.30.112.23:9280/fedora/get/dipp:1001/DiPPExt
 * 
 * @author Jan Schnasse, schnasse@hbz-nrw.de
 * 
 */
public class DippDownloader extends Downloader {

    protected void downloadObject(File dir, String pid) {
        try {
            logger.debug(pid + " start download!");
            URL url = new URL(getServer() + "get/" + pid + "?xml=true");
            File file = new File(dir.getAbsolutePath() + File.separator + URLEncoder.encode(pid, "utf-8") + ".xml");
            String data = null;
            StringWriter writer = new StringWriter();
            IOUtils.copy(url.openStream(), writer);
            data = writer.toString();
            FileUtils.writeStringToFile(file, data, "utf-8");

            downloadStreams(dir, pid);
            downloadConstituent(dir, pid);
            downloadRelatedObject(dir, pid, "rel:hasPart");

            downloadRelatedObject(dir, pid, "rel:isPartOf");
            downloadRelatedObject(new File(getDownloadLocation()), pid, "rel:isMemberOf");
            downloadRelatedObject(new File(getDownloadLocation()), pid, "rel:isSubsetOf");
            downloadRelatedObject(new File(getDownloadLocation()), pid, "rel:isMemberOfCollection");
        } catch (MalformedURLException e) {
            logger.error(e.getMessage());
        } catch (IOException e) {
            logger.error(e.getMessage());
        }

    }

    private void downloadConstituent(File dir, String pid) {
        String relation = "rel:hasConstituent";

        try {
            URL url = new URL(getServer() + "get/" + pid + "/RELS-EXT");
            String data = null;
            StringWriter writer = new StringWriter();
            IOUtils.copy(url.openStream(), writer);
            data = writer.toString();

            Element root = XmlUtils.getDocument(data);
            NodeList constituents = root.getElementsByTagName(relation);
            if (constituents == null || constituents.getLength() == 0)
                return;
            File zipDir = new File(dir.getAbsolutePath() + File.separator + "content");
            for (int i = 0; i < constituents.getLength(); i++) {

                Element c = (Element) constituents.item(i);
                String cPid = c.getAttribute("rdf:resource").replace("info:fedora/", "");
                if (cPid.contains("temp")) {
                    logger.debug(cPid + " skip temporary object.");

                } else {
                    File cDir = new File(dir.getAbsolutePath() + File.separator

                            + URLEncoder.encode(cPid, "utf-8"));

                    try {
                        downloadObject(cDir, cPid);
                    } catch (Exception e) {
                        logger.warn(e.getMessage());
                    }
                    try {
                        getMap().remove(cPid);
                        downloadObject(zipDir, cPid);
                    } catch (Exception e) {
                        logger.warn(e.getMessage());
                    }
                }

            }
            File cFile = new File(dir.getAbsolutePath() + File.separator + "content.zip");
            logger.debug("I will zip now! " + zipDir.getAbsolutePath() + " to " + cFile.getAbsolutePath());
            zip(zipDir, cFile);

        } catch (Exception e) {
            logger.error(e.getMessage());
        }

    }

    private void downloadRelatedObject(File dir, String pid, String relation) {
        try {
            URL url = new URL(getServer() + "get/" + pid + "/RELS-EXT");
            String data = null;
            StringWriter writer = new StringWriter();
            IOUtils.copy(url.openStream(), writer);
            data = writer.toString();

            Element root = XmlUtils.getDocument(data);
            NodeList constituents = root.getElementsByTagName(relation);
            for (int i = 0; i < constituents.getLength(); i++) {
                try {
                    Element c = (Element) constituents.item(i);
                    String cPid = c.getAttribute("rdf:resource").replace("info:fedora/", "");

                    logger.debug(pid + " " + relation + " " + cPid);
                    // if (!cPid.contains("oai") && !cPid.contains("temp")
                    // && !pid.contains("oai") && !pid.contains("temp"))
                    logger.info("DOWNLOAD-GRAPH: \"" + pid + "\"->\"" + cPid + "\" [label=\"" + relation + "\"]");

                    if (cPid.contains("temp")) {
                        logger.debug(cPid + " skip temporary object.");

                    } else {
                        File cDir = new File(dir.getAbsolutePath() + File.separator

                                + URLEncoder.encode(cPid, "utf-8"));

                        downloadObject(cDir, cPid);
                    }
                } catch (Exception e) {
                    logger.debug(e.getMessage());
                }

            }
        } catch (Exception e) {
            logger.error(e.getMessage());
        }
    }

    private void downloadStreams(File dir, String pid) {
        try {
            URL url = new URL(getServer() + "listDatastreams/" + pid + "?xml=true");
            String data = null;
            StringWriter writer = new StringWriter();
            IOUtils.copy(url.openStream(), writer);
            data = writer.toString();

            Element root = XmlUtils.getDocument(data);
            NodeList dss = root.getElementsByTagName("datastream");

            for (int i = 0; i < dss.getLength(); i++) {
                Element dsel = (Element) dss.item(i);
                String datastreamName = dsel.getAttribute("dsid");
                String fileName = dsel.getAttribute("label");
                String mimeType = dsel.getAttribute("mimeType");

                if (mimeType.contains("xml")) {
                    fileName = datastreamName + ".xml";
                }
                if (mimeType.contains("html")) {
                    fileName = fileName + ".html";
                }

                File dataStreamFile = new File(dir.getAbsolutePath() + File.separator + "" + fileName);
                download(dataStreamFile, getServer() + "get/" + pid + "/" + datastreamName);
            }

        } catch (MalformedURLException e) {
            logger.error(e.getMessage());
        } catch (IOException e) {
            logger.error(e.getMessage());
        }
    }

    /**
     * @param argv
     *            the argument vector must contain exactly one item which points
     *            to a valid property file
     */
    public static void main(String[] argv) {
        if (argv.length != 1) {
            System.out.println("\nWrong Number of Arguments!");
            System.out.println("Please specify a config.properties file!");
            System.out.println("Example: java -jar dtldownloader.jar dtldownloader.properties\n");
            System.out.println(
                    "Example Properties File:\n\tpidreporter.server=http://urania.hbz-nrw.de:1801/edowebOAI/\n\tpidreporter.set=null\n\tpidreporter.harvestFromScratch=true\n\tpidreporter.pidFile=pids.txt\n\tpiddownloader.server=http://klio.hbz-nrw.de:1801\n\tpiddownloader.downloadLocation=/tmp/zbmed");
            System.exit(1);
        }

        Downloader downloader = new DippDownloader();
        downloader.run(argv[0]);

    }

}