de.uzk.hki.da.core.IngestAreaScannerWorker.java Source code

Java tutorial

Introduction

Here is the source code for de.uzk.hki.da.core.IngestAreaScannerWorker.java

Source

/*
  DA-NRW Software Suite | ContentBroker
  Copyright (C) 2013 Historisch-Kulturwissenschaftliche Informationsverarbeitung
  Universitt zu Kln
    
  This program is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation, either version 3 of the License, or
  (at your option) any later version.
    
  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
    
  You should have received a copy of the GNU General Public License
  along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

package de.uzk.hki.da.core;

import java.io.File;
import java.io.FilenameFilter;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.commons.io.FilenameUtils;
import org.hibernate.Session;
import org.slf4j.MDC;

import de.uzk.hki.da.model.Job;
import de.uzk.hki.da.model.Node;
import de.uzk.hki.da.model.Object;
import de.uzk.hki.da.model.User;
import de.uzk.hki.da.service.HibernateUtil;
import de.uzk.hki.da.utils.C;
import de.uzk.hki.da.utils.Path;

/**
 * Scans the staging area for new packages. If new files are there whose stay 
 * has been longer as specified in (minAge), they get moved to a destination area
 * and a job gets created for them.
 * 
 * <pre><code>
 *   Staging area may look like
 *   .../stagingRoot/TEST1
 *   .../stagingRoot/TEST2
 *   
 *   and destination area like
 *   .../destinationRoot/TEST1
 *   .../destinationRoot/TEST2
 * </code></pre>
 * A file located under .../stagingRoot/TEST1/a.txt would then be moved to .../destinationRoot/TEST1/a.txt
 * 
 * @author Daniel M. de Oliveira
 *
 */
public class IngestAreaScannerWorker extends Worker {

    /**
     * To rule everything out except containers of valid formats.
     */
    private class AcceptedContainerFormatsFilter implements FilenameFilter {

        /* (non-Javadoc)
         * @see java.io.FilenameFilter#accept(java.io.File, java.lang.String)
         */
        @Override
        public boolean accept(File dir, String name) {
            return (name.endsWith(".zip") || name.endsWith(".tar.gz") || name.endsWith(".tgz")
                    || name.endsWith(".tar"));
        }
    }

    private class BagitScanner implements FilenameFilter {

        /* (non-Javadoc)
         * @see java.io.FilenameFilter#accept(java.io.File, java.lang.String)
         */
        @Override
        public boolean accept(File dir, String name) {
            return (name.indexOf("bagit") >= 0 && name.endsWith(".txt"));
        }
    }

    /** The min age. */
    private int minAge; // required minimum age in milliseconds

    private Path ingestAreaRootPath;

    /** The local node name. */
    private String localNodeId;

    /** The register object service. */
    private RegisterObjectService registerObjectService;

    /** The files. */
    private Map<String, Map<String, Long>> sipsForIngestForCSN = new HashMap<String, Map<String, Long>>(); //CSN-String -> FileName-String -> Timestamp-Long

    /** The contractor short names. */
    private List<User> contractors = new ArrayList<User>();

    /**
     * @return The contractors whose ingest folder will get scanned for files. 
     */
    public Set<User> init() {
        if ((ingestAreaRootPath == null) || (ingestAreaRootPath.toString().isEmpty()))
            throw new IllegalStateException("ingestAreaRootPath must not set");

        if (!ingestAreaRootPath.toFile().exists())
            throw new RuntimeException("No file or directory: " + ingestAreaRootPath);

        logger.info("Scanning staging area for contractor folders");
        String children[] = ingestAreaRootPath.toFile().list();

        for (int i = 0; i < children.length; i++) {
            if (!Path.makeFile(ingestAreaRootPath, children[i]).isDirectory())
                continue;

            Session session = HibernateUtil.openSession();
            session.beginTransaction();
            User contractor = getContractor(session, children[i]);
            session.close();
            if (contractor == null) {
                logger.warn("Cannot find in ObjectDB: " + children[i] + " -  will not scan sips for contractor");
                continue;
            } else
                logger.info("Will scan sips for contractor: " + children[i]);

            contractors.add(contractor);
            if (!sipsForIngestForCSN.keySet().contains(contractor.getShort_name()))
                sipsForIngestForCSN.put(contractor.getShort_name(), new HashMap<String, Long>());
        }
        return new HashSet<User>(contractors);
    }

    @Override
    public void setMDC() {
        MDC.put(WORKER_ID, "ingest");
    }

    /**
     * Checking for new files in the staging area.   
     */
    @Override
    public void scheduleTaskImplementation() {

        try {

            long currentTimeStamp = System.currentTimeMillis();

            for (User contractor : contractors) {

                for (String child : scanContractorFolderForReadySips(contractor.getShort_name(),
                        currentTimeStamp)) {

                    logger.info("Found file \"" + child + "\" in ingest Area. Creating job for \""
                            + contractor.getShort_name() + "\"");

                    Object object = null;
                    try {
                        object = registerObjectService.registerObject(child, contractor);
                    } catch (UserException e) {
                        logger.error("cannot register object " + child + " for contractor " + contractor
                                + ". Skip creating job for object.", e);
                        continue;
                    }

                    Job job = insertJobIntoQueueAndSetWorkFlowState(contractor,
                            convertMaskedSlashes(FilenameUtils.removeExtension(child)), localNodeId, object);
                    logger.debug("Created new Object " + object + ":::: Created job: " + job);
                }
            }
        } catch (Exception e) { // Should catch everything in scheduleTask. Otherwise thread dies without notice.
            logger.error("Caught: " + e, e);
        }
    }

    private List<String> scanContractorFolderForReadySips(String short_name, long currentTimeStamp) {
        List<String> childrenWhichAreReadyFiles = new ArrayList<String>();
        List<String> childrenWhichAreReadyDirs = new ArrayList<String>();
        List<String> childrenWhichAreReadySips = new ArrayList<String>();
        childrenWhichAreReadyFiles = scanContractorFolderForReadyFiles(short_name, currentTimeStamp);
        childrenWhichAreReadyDirs = scanContractorFolderForReadyDirs(short_name, currentTimeStamp);
        if (childrenWhichAreReadyFiles != null)
            childrenWhichAreReadySips.addAll(childrenWhichAreReadyFiles);
        if (childrenWhichAreReadyDirs != null)
            childrenWhichAreReadySips.addAll(childrenWhichAreReadyDirs);
        return childrenWhichAreReadySips;
    }

    /**
     * Insert job into queue.
     *
     * @param contractorShortName the contractor short name
     * @param origName the orig name
     * @param responsibleNodeName the initial node name
     * @return the job
     */
    private Job insertJobIntoQueueAndSetWorkFlowState(User c, String origName, String responsibleNodeId,
            Object object) {

        object.setObject_state(Object.ObjectStatus.InWorkflow);

        Session session = HibernateUtil.openSession();
        session.beginTransaction();
        session.saveOrUpdate(object);

        Node node = (Node) session.get(Node.class, Integer.parseInt(responsibleNodeId));

        Job job = new Job();
        job.setObject(object);

        job.setStatus(C.WORKFLOW_STATUS_START___INGEST_UNPACK_ACTION);
        job.setResponsibleNodeName(node.getName());
        job.setCreatedAt(new Date());
        job.setModifiedAt(new Date());

        session.save(job);
        session.getTransaction().commit();
        session.close();

        return job;
    }

    /**
     * Gets the contractor.
     *
     * @param contractorShortName the contractor short name
     * @return null if no contractor for short name could be found
     */
    private User getContractor(Session session, String contractorShortName) {
        @SuppressWarnings("rawtypes")
        List list;
        list = session.createQuery("from User where short_name=?1")

                .setParameter("1", contractorShortName).setReadOnly(true).list();

        if (list.isEmpty())
            return null;

        return (User) list.get(0);
    }

    /**
     * Tells us which files are inside a contractor specific stage folder are ready to move.
     * Two conditions must be met in order two be moved
     * <li>The file must be old enough
     * <li>The file must pass the AcceptedContainerFormatsFilter
     *
     * @param contractorShortName the contractor short name
     * @param currentTimeStamp the current time stamp
     * @return list of files which meet the above mentioned conditions.
     */
    private List<String> scanContractorFolderForReadyFiles(String contractorShortName, long currentTimeStamp) {

        String children[] = Path.makeFile(ingestAreaRootPath, contractorShortName)
                .list(new AcceptedContainerFormatsFilter());
        List<String> childrenWhichAreReady = new ArrayList<String>();
        if (children != null) {
            for (int i = 0; i < children.length; i++) {
                if (Path.makeFile(ingestAreaRootPath, contractorShortName, children[i]).isFile())
                    childrenWhichAreReady = addToList(children[i], contractorShortName, currentTimeStamp,
                            childrenWhichAreReady);
            }
        }
        return childrenWhichAreReady;
    }

    /** Allowing uncompressed Bagits
     * @author jens peters, 2016
     */
    private List<String> scanContractorFolderForReadyDirs(String contractorShortName, long currentTimeStamp) {

        String children[] = Path.makeFile(ingestAreaRootPath, contractorShortName).list();
        List<String> childrenWhichAreReady = new ArrayList<String>();
        if (children != null) {
            for (int i = 0; i < children.length; i++) {

                if (!Path.makeFile(ingestAreaRootPath, contractorShortName, children[i]).isDirectory())
                    continue;
                String bags[] = Path.makeFile(ingestAreaRootPath, contractorShortName, children[i])
                        .list(new BagitScanner());
                if (bags != null && bags.length > 0) {
                    // we don't check for bagit consistency while UnpackAction does this and we'll get those 
                    // packs by move operation
                    logger.debug("found directory " + children[i] + " which may contain unpacked bagit");
                    childrenWhichAreReady = addToList(children[i], contractorShortName, currentTimeStamp,
                            childrenWhichAreReady);
                }
            }
        }
        return childrenWhichAreReady;
    }

    private List<String> addToList(String sipname, String csn, long currentTimeStamp,
            List<String> childrenWhichAreReady) {
        if (!sipsForIngestForCSN.get(csn).containsKey(sipname)) {
            Job job = getJob(convertMaskedSlashes(FilenameUtils.removeExtension(sipname)), csn);
            if (job == null) { // consider only containers for which there is not already a job in queue since it is possible that the CB has stopped and now resumes work.
                logger.debug("New file found, making timestamp for: " + sipname);
                sipsForIngestForCSN.get(csn).put(sipname, currentTimeStamp);
            } else {
                //               USER EMAIL 
            }
        } else {
            long diff = currentTimeStamp - sipsForIngestForCSN.get(csn).get(sipname);
            logger.debug("Old file found, lets look how their timestamps differ: " + sipname + " diff: " + diff);

            if (diff > minAge) {
                logger.debug("File " + sipname + " which has lasted " + minAge + " miliseconds is ready.");
                sipsForIngestForCSN.get(csn).remove(sipname);
                childrenWhichAreReady.add(sipname);
            }
        }
        return childrenWhichAreReady;
    }

    /**
     * Gets the job.
     *
     * @param orig_name the orig_name
     * @param csn the csn
     * @return the job
     */
    @SuppressWarnings("unchecked")
    private Job getJob(String orig_name, String csn) {
        Session session = HibernateUtil.openSession();
        session.beginTransaction();
        List<Job> l = null;

        l = session.createQuery(
                "SELECT j FROM Job j left join j.obj as o left join o.user as c where o.orig_name=?1 and c.short_name=?2")
                .setParameter("1", orig_name).setParameter("2", csn).setReadOnly(true).list();
        session.close();

        try {
            return l.get(0);
        } catch (IndexOutOfBoundsException e) {
            logger.debug("search for a job with orig_name " + orig_name + " for user " + csn + " returns null!");
            return null;
        }
    }

    // TODO factor out
    /**
     * Replaces %2F inside a string to /.
     *
     * @param input the input
     * @return the string
     */
    String convertMaskedSlashes(String input) {
        return input.replaceAll("%2F", "/");
    }

    /**
     */
    public Path getIngestAreaRootPath() {
        return ingestAreaRootPath;
    }

    /**
     */
    public void setIngestAreaRootPath(Path ingestAreaRootPath) {
        this.ingestAreaRootPath = ingestAreaRootPath;
    }

    /**
     * Sets the min age.
     *
     * @param minAge the new min age
     */
    public void setMinAge(int minAge) {
        this.minAge = minAge;
    }

    /**
     * Gets the min age.
     *
     * @return the min age
     */
    public int getMinAge() {
        return minAge;
    }

    /**
     * Gets the local node name.
     *
     * @return the local node name
     */
    public String getLocalNodeId() {
        return localNodeId;
    }

    /**
     * Sets the local node name.
     *
     * @param localNodeName the new local node name
     */
    public void setLocalNodeId(String localNodeId) {
        this.localNodeId = localNodeId;
    }

    /**
     * Gets the register object service.
     *
     * @return the register object service
     */
    public RegisterObjectService getRegisterObjectService() {
        return registerObjectService;
    }

    /**
     * Sets the register object service.
     *
     * @param registerObjectService the new register object service
     */
    public void setRegisterObjectService(RegisterObjectService registerObjectService) {
        this.registerObjectService = registerObjectService;
    }

}