ch.entwine.weblounge.bridge.oaipmh.WebloungeHarvester.java Source code

Java tutorial

Introduction

Here is the source code for ch.entwine.weblounge.bridge.oaipmh.WebloungeHarvester.java

Source

/*
 *  Weblounge: Web Content Management System
 *  Copyright (c) 2011 The Weblounge Team
 *  http://weblounge.o2it.ch
 *
 *  This program is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public License
 *  as published by the Free Software Foundation; either version 2
 *  of the License, or (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program; if not, write to the Free Software Foundation
 *  Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 */

package ch.entwine.weblounge.bridge.oaipmh;

import static org.opencastproject.util.data.Option.some;

import ch.entwine.weblounge.bridge.oaipmh.harvester.ListRecordsResponse;
import ch.entwine.weblounge.bridge.oaipmh.harvester.OaiPmhRepositoryClient;
import ch.entwine.weblounge.bridge.oaipmh.harvester.RecordHandler;
import ch.entwine.weblounge.common.content.SearchQuery;
import ch.entwine.weblounge.common.content.SearchQuery.Order;
import ch.entwine.weblounge.common.content.SearchResult;
import ch.entwine.weblounge.common.content.movie.MovieResource;
import ch.entwine.weblounge.common.impl.content.SearchQueryImpl;
import ch.entwine.weblounge.common.impl.content.movie.MovieResourceSearchResultItemImpl;
import ch.entwine.weblounge.common.impl.security.UserImpl;
import ch.entwine.weblounge.common.repository.ContentRepositoryException;
import ch.entwine.weblounge.common.repository.WritableContentRepository;
import ch.entwine.weblounge.common.scheduler.JobException;
import ch.entwine.weblounge.common.scheduler.JobWorker;
import ch.entwine.weblounge.common.security.User;
import ch.entwine.weblounge.common.site.Site;

import org.apache.commons.lang.StringUtils;
import org.opencastproject.util.data.Option;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Node;

import java.io.Serializable;
import java.lang.reflect.Constructor;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Date;
import java.util.Dictionary;

/**
 * This cron job uses the harvester to harvest the <code>matterhorn</code>
 * prefix of the content repository that is configured in the job's
 * configuration options.
 */
public class WebloungeHarvester implements JobWorker {

    /** Logging facility */
    protected static final Logger logger = LoggerFactory.getLogger(WebloungeHarvester.class);

    /** Configuration option for the repository to harvest */
    public static final String OPT_REPOSITORY_URL = "repository.url";

    /** Configuration option for the repository to harvest */
    public static final String OPT_HANDLER_CLASS = "handler.class";

    /** Configuration option for the flavor of the presentation tracks to use */
    public static final String OPT_PRSENTATION_TRACK_FLAVORS = "presentation-track-flavor";

    /** Configuration option for the flavor of the presenter tracks to use */
    public static final String OPT_PRESENTER_TRACK_FLAVORS = "presenter-track-flavor";

    /** Configuration option for the flavor of the dublin core episode to use */
    public static final String OPT_EPISODE_DC_FLAVORS = "episode-dublincore-flavor";

    /** Configuration option for the flavor of the dublin core series to use */
    public static final String OPT_SERIES_DC_FLAVORS = "series-dublincore-flavor";

    /** Configuration option for the mime-types to use */
    public static final String OPT_MIMETYPES = "mime-types";

    /**
     * {@inheritDoc}
     * 
     * @see ch.entwine.weblounge.common.scheduler.JobWorker#execute(java.lang.String,
     *      java.util.Dictionary)
     */
    @SuppressWarnings("unchecked")
    public void execute(String name, Dictionary<String, Serializable> ctx) throws JobException {

        Site site = (Site) ctx.get(Site.class.getName());

        // Get hold of the content repository
        WritableContentRepository contentRepository = null;
        if (site.getContentRepository().isReadOnly())
            throw new JobException(this, "Content repository of site '" + site + "' is read only");
        contentRepository = (WritableContentRepository) site.getContentRepository();

        // Read the configuration value for the repository url
        String repositoryUrl = (String) ctx.get(OPT_REPOSITORY_URL);
        if (StringUtils.isBlank(repositoryUrl))
            throw new JobException(this,
                    "Configuration option '" + OPT_REPOSITORY_URL + "' is missing from the job configuration");

        // Make sure the url is well formed
        URL url = null;
        try {
            url = new URL(repositoryUrl);
        } catch (MalformedURLException e) {
            throw new JobException(this, "Repository url '" + repositoryUrl + "' is malformed: " + e.getMessage());
        }

        // Read the configuration value for the flavors
        String presentationTrackFlavor = (String) ctx.get(OPT_PRSENTATION_TRACK_FLAVORS);
        if (StringUtils.isBlank(presentationTrackFlavor))
            throw new JobException(this, "Configuration option '" + OPT_PRSENTATION_TRACK_FLAVORS
                    + "' is missing from the job configuration");

        String presenterTrackFlavor = (String) ctx.get(OPT_PRESENTER_TRACK_FLAVORS);
        if (StringUtils.isBlank(presenterTrackFlavor))
            throw new JobException(this, "Configuration option '" + OPT_PRESENTER_TRACK_FLAVORS
                    + "' is missing from the job configuration");

        String dcEpisodeFlavor = (String) ctx.get(OPT_EPISODE_DC_FLAVORS);
        if (StringUtils.isBlank(dcEpisodeFlavor))
            throw new JobException(this,
                    "Configuration option '" + OPT_EPISODE_DC_FLAVORS + "' is missing from the job configuration");

        String dcSeriesFlavor = (String) ctx.get(OPT_SERIES_DC_FLAVORS);
        if (StringUtils.isBlank(dcSeriesFlavor))
            throw new JobException(this,
                    "Configuration option '" + OPT_SERIES_DC_FLAVORS + "' is missing from the job configuration");

        String mimesTypes = (String) ctx.get(OPT_MIMETYPES);
        if (StringUtils.isBlank(mimesTypes))
            throw new JobException(this,
                    "Configuration option '" + OPT_MIMETYPES + "' is missing from the job configuration");

        // Read the configuration value for the handler class
        String handlerClass = (String) ctx.get(OPT_HANDLER_CLASS);
        if (StringUtils.isBlank(handlerClass))
            throw new JobException(this,
                    "Configuration option '" + OPT_HANDLER_CLASS + "' is missing from the job configuration");

        UserImpl harvesterUser = new UserImpl(name, site.getIdentifier(), "Harvester");

        RecordHandler handler;
        try {
            Class<? extends AbstractWebloungeRecordHandler> c = (Class<? extends AbstractWebloungeRecordHandler>) Thread
                    .currentThread().getContextClassLoader().loadClass(handlerClass);
            Class<?> paramTypes[] = new Class[8];
            paramTypes[0] = Site.class;
            paramTypes[1] = WritableContentRepository.class;
            paramTypes[2] = User.class;
            paramTypes[3] = String.class;
            paramTypes[4] = String.class;
            paramTypes[5] = String.class;
            paramTypes[6] = String.class;
            paramTypes[7] = String.class;
            Constructor<? extends AbstractWebloungeRecordHandler> constructor = c.getConstructor(paramTypes);
            Object arglist[] = new Object[8];
            arglist[0] = site;
            arglist[1] = contentRepository;
            arglist[2] = harvesterUser;
            arglist[3] = presentationTrackFlavor;
            arglist[4] = presenterTrackFlavor;
            arglist[5] = dcEpisodeFlavor;
            arglist[6] = dcSeriesFlavor;
            arglist[7] = mimesTypes;
            handler = constructor.newInstance(arglist);
        } catch (Throwable t) {
            throw new IllegalStateException("Unable to instantiate class " + handlerClass + ": " + t.getMessage(),
                    t);
        }

        SearchResult searchResult;
        SearchQuery q = new SearchQueryImpl(site);
        q.withTypes(MovieResource.TYPE);
        q.sortByPublishingDate(Order.Descending);
        q.withPublisher(harvesterUser);
        try {
            searchResult = contentRepository.find(q);
        } catch (ContentRepositoryException e) {
            logger.error("Error searching for resources with harvester publisher.");
            throw new RuntimeException(e);
        }

        Option<Date> harvestingDate = Option.<Date>none();
        if (searchResult.getHitCount() > 0) {
            MovieResourceSearchResultItemImpl resultItem = (MovieResourceSearchResultItemImpl) searchResult
                    .getItems()[0];
            Date lastDate = resultItem.getMovieResource().getPublishFrom();
            // To not include the resources updated, 1 second is added to the last
            // update date
            lastDate.setTime(lastDate.getTime() + 1000);
            harvestingDate = some(lastDate);
        }

        try {
            harvest(repositoryUrl, harvestingDate, handler);
        } catch (Exception e) {
            logger.warn("An error occured while harvesting " + url + ". Skipping this repository for now...",
                    e.getMessage());

        }
    }

    private void harvest(String url, Option<Date> from, RecordHandler handler) throws Exception {
        logger.info("Harvesting " + url + " from " + from + " on thread " + Thread.currentThread());
        OaiPmhRepositoryClient repositoryClient = OaiPmhRepositoryClient.newHarvester(url);
        ListRecordsResponse response = repositoryClient.listRecords(handler.getMetadataPrefix(), from,
                Option.<Date>none(), Option.<String>none());
        if (!response.isError()) {
            for (Node recordNode : ListRecordsResponse.getAllRecords(response, repositoryClient)) {
                handler.handle(recordNode);
            }
        } else if (response.isErrorNoRecordsMatch()) {
            logger.info("Repository returned no records.");
        } else {
            logger.error("Repository returned error code: " + response.getErrorCode().getOrElse("?"));
        }
    }

}