com.seajas.search.profiler.task.FeedInjectionTask.java Source code

Java tutorial

Introduction

Here is the source code for com.seajas.search.profiler.task.FeedInjectionTask.java

Source

/**
 * Copyright (C) 2013 Seajas, the Netherlands.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 3, as
 * published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
package com.seajas.search.profiler.task;

import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import org.springframework.util.StringUtils;

import com.seajas.search.bridge.jms.integration.GroupIdDecorator;
import com.seajas.search.bridge.profiler.model.feed.Feed;
import com.seajas.search.bridge.profiler.model.feed.FeedResultParameter;
import com.seajas.search.bridge.profiler.model.feed.FeedUrl;
import com.seajas.search.profiler.authentication.strategy.AuthenticationResult;
import com.seajas.search.profiler.jms.service.InjectionService;
import com.seajas.search.profiler.service.profiler.ProfilerService;

/**
 * Feed injection task.
 * 
 * @author Jasper van Veghel <jasper@seajas.com>
 */
@Component
public class FeedInjectionTask implements InjectionTask {
    /**
     * The logger.
     */
    private static final Logger logger = LoggerFactory.getLogger(FeedInjectionTask.class);

    /**
     * The random minimum.
     */
    private static final Integer RANDOM_RANGE_MINIMUM = 0;

    /**
     * The default trigger.
     */
    private static final String TRIGGER_NAME_DEFAULT = "default";

    /**
     * The random number generator for random delays and user agents.
     */
    private static final Random randomGenerator = new Random();

    /**
     * The profiler service.
     */
    @Autowired
    private ProfilerService profilerService;

    /**
     * The injection service.
     */
    @Autowired
    private InjectionService injectionService;

    /**
     * The result mapping parameter.
     */
    @Value("${profiler.project.result.mapping.parameter}")
    private String resultMappingParameter;

    /**
     * The result mapping header.
     */
    @Value("${profiler.project.result.mapping.header}")
    private String resultMappingHeader;

    /**
     * The default user agent.
     */
    @Value("${profiler.project.anonymization.default.http.user.agent}")
    private String defaultUserAgent;

    /**
     * {@inheritDoc}
     */
    @Override
    public void inject(final String triggerName, final Long intervalTotal,
            final InjectionJobInterrupted interrupted) {
        Date currentTime = new Date();

        List<Feed> enabledFeeds = getInjectableFeeds(triggerName, intervalTotal, currentTime);

        // Only log when we're not doing distributed injection - so to not get this every second

        if (logger.isInfoEnabled() && intervalTotal == null)
            logger.info("Performing feed injection under trigger '" + triggerName + "' (" + enabledFeeds.size()
                    + " feed" + (enabledFeeds.size() != 1 ? "s" : "") + ")");

        // We report on the number of feeds first, and then move on to potentially canceling the operation

        if (Boolean.getBoolean("profiler.indexing.disabled")) {
            if (logger.isInfoEnabled())
                logger.info(
                        "Indexing has been explicitly disabled using 'profiler.indexing.disabled=true'. Skipping injection.");

            return;
        }

        for (Feed feed : enabledFeeds) {
            if (interrupted.isInterrupted()) {
                logger.warn("This job was interrupted - not continuing with feed injection");

                break;
            }

            // Mark this feed's injection date (ahead of the actual injection - but then this is not such a critical matter)

            profilerService.updateFeedLastInjected(feed.getId(), currentTime);

            // Determine whether this feed falls within the anonymization run from / until range

            if (feed.getFeedAnonymization() != null) {
                Date currentDate = Calendar.getInstance().getTime(),
                        runFrom = feed.getFeedAnonymization().getRunFrom(),
                        runUntil = feed.getFeedAnonymization().getRunUntil();

                if ((runFrom != null && currentDate.before(runFrom))
                        || (runUntil != null && currentDate.after(runUntil))) {
                    logger.info("The feed with name '" + feed.getName()
                            + "' falls outside of its anonymization run from/until date - skipping");

                    continue;
                }
            }

            // Create a map out of the feed result parameters

            Map<String, String> resultParameters = new HashMap<String, String>();

            if (feed.getFeedResultParameters() != null && feed.getFeedResultParameters().size() > 0)
                for (FeedResultParameter feedResultParameter : feed.getFeedResultParameters()) {
                    if (resultParameters.containsKey(feedResultParameter.getFieldName()))
                        logger.warn("The result map already contains a parameter named '"
                                + feedResultParameter.getFieldName() + "' - it will be overwritten");

                    resultParameters.put(feedResultParameter.getFieldName(), feedResultParameter.getFieldValue());
                }

            // Now inject each feed URL

            for (FeedUrl feedUrl : feed.getFeedUrls()) {
                if (interrupted.isInterrupted()) {
                    logger.warn("This job was interrupted - not continuing with feed injection");

                    break;
                }

                try {
                    Long feedDelay = 0L, resultDelay = 0L;

                    // Determine the feed-URL specific anonymization settings

                    if (feed.getFeedAnonymization() != null) {
                        // Determine the feed delay

                        feedDelay = feed.getFeedAnonymization() != null
                                && feed.getFeedAnonymization().getFeedDelay() != null
                                        ? feed.getFeedAnonymization().getFeedDelay()
                                        : 0L;

                        if (feed.getFeedAnonymization().getIsFeedDelayRandomized())
                            feedDelay = (long) randomGenerator
                                    .nextInt(feedDelay.intValue() - RANDOM_RANGE_MINIMUM + 1)
                                    + RANDOM_RANGE_MINIMUM;

                        // Don't randomize the result delay here, but instead do this on the contender-side

                        resultDelay = feed.getFeedAnonymization() != null
                                && feed.getFeedAnonymization().getFeedElementDelay() != null
                                        ? feed.getFeedAnonymization().getFeedElementDelay()
                                        : 0L;
                    }

                    // Perform the optional authentication step

                    String authenticatedUrl = feedUrl.getUrl();
                    Map<String, String> authenticatedResultParameters = resultParameters;

                    if (feed.getFeedConnection() != null) {
                        if (feed.getFeedConnection().getAuthenticationStrategy() != null) {
                            AuthenticationResult authenticationResult = profilerService.applyAuthenticationStrategy(
                                    feed.getFeedConnection().getAuthenticationStrategy(), authenticatedUrl,
                                    authenticatedResultParameters);

                            authenticatedUrl = authenticationResult.getUrl();
                            authenticatedResultParameters = authenticationResult.getResultParameters();
                        }
                    }

                    URI uri = new URI(authenticatedUrl);
                    String hostname = StringUtils.hasText(uri.getHost()) ? uri.getHost().replace("www.", "")
                            : "localhost";

                    logger.info("Injecting feed with name '" + feed.getName() + "' and URI '" + uri
                            + "' into bridge layer");

                    // Determine which of the result parameters to turn into headers

                    Map<String, String> resultHeaders = null;

                    if (StringUtils.hasText(resultMappingParameter) && StringUtils.hasText(resultMappingHeader)) {
                        if (!authenticatedResultParameters.containsKey(resultMappingParameter)) {
                            if (logger.isDebugEnabled())
                                logger.debug("Result mapping was requested, but result mapping parameter '"
                                        + resultMappingParameter
                                        + "' is not present in the result parameter map - not adding header '"
                                        + resultMappingHeader + "'");
                        } else {
                            resultHeaders = new HashMap<String, String>();

                            resultHeaders.put(resultMappingHeader,
                                    authenticatedResultParameters.get(resultMappingParameter));
                        }
                    }

                    // Inject it into the queue

                    com.seajas.search.bridge.jms.model.Feed resultFeed = new com.seajas.search.bridge.jms.model.Feed();

                    resultFeed.setUri(uri);
                    resultFeed.setId(feed.getId());
                    resultFeed.setName(feed.getName());
                    resultFeed.setHostname(hostname);
                    resultFeed.setCollection(feed.getCollection());

                    resultFeed.setFeedEncodingOverride(feed.getFeedEncodingOverride());
                    resultFeed.setResultEncodingOverride(feed.getResultEncodingOverride());
                    resultFeed.setLanguageOverride(feed.getLanguage());
                    resultFeed.setSummaryBased(feed.getIsSummaryBased());

                    resultFeed.setDelay(feedDelay);
                    resultFeed.setElementDelay(resultDelay);
                    resultFeed.setElementDelayRandomized(feed.getFeedAnonymization() != null
                            && Boolean.TRUE.equals(feed.getFeedAnonymization().getIsFeedElementDelayRandomized()));

                    resultFeed.setUserAgent(determineUserAgent(feed));
                    resultFeed.setUserAgentsOverride(feed.getFeedAnonymization() != null
                            ? feed.getFeedAnonymization().getPreferredUserAgents()
                            : null);
                    resultFeed.setUserAgentsOverrideRandomized(feed.getFeedAnonymization() != null && Boolean.TRUE
                            .equals(feed.getFeedAnonymization().getIsPreferredUserAgentsRandomized()));

                    resultFeed.setResultParameters(authenticatedResultParameters);
                    resultFeed.setRetrievalRequestHeaders(resultHeaders);

                    injectionService.inject(resultFeed, GroupIdDecorator.decorate(hostname), feedDelay);
                } catch (URISyntaxException e) {
                    logger.error("The given " + (feed.getFeedConnection() != null
                            && feed.getFeedConnection().getAuthenticationStrategy() != null ? "(unauthenticated) "
                                    : "")
                            + "feed URI '" + feedUrl.getUrl() + "' is not valid");
                }
            }
        }

        if (logger.isTraceEnabled())
            logger.trace("Finished feed injection for trigger '" + triggerName + "'");
    }

    /**
     * Determine the injectable feeds.
     *
     * @param triggerName
     * @param intervalTotal
     * @param currentTime
     * @return List<Feed>
     */
    private List<Feed> getInjectableFeeds(final String triggerName, final Long intervalTotal,
            final Date currentTime) {
        List<Feed> enabledFeeds = new ArrayList<Feed>();

        // If this is the default trigger, we add all un-injected enabled feeds

        if (TRIGGER_NAME_DEFAULT.equals(triggerName)) {
            List<Feed> uninjectedFeeds = profilerService.getEnabledUninjectedFeeds();

            if (uninjectedFeeds.size() > 0) {
                if (logger.isInfoEnabled())
                    logger.info(String.format("Injecting %d additional, previously un-injected, feeds",
                            uninjectedFeeds.size()));

                enabledFeeds.addAll(uninjectedFeeds);
            }
        }

        // Add all enabled feeds for this trigger

        Integer enabledFeedCount = profilerService.getEnabledFeedCount(triggerName);

        Long injectedBefore = intervalTotal != null ? currentTime.getTime() - (intervalTotal * 1000) : null;
        Long maximumResults = intervalTotal != null
                ? (long) Math.ceil((double) enabledFeedCount / (double) intervalTotal)
                : null;

        enabledFeeds.addAll(profilerService.getEnabledFeeds(triggerName,
                injectedBefore != null ? new Date(injectedBefore) : null, maximumResults));

        if (maximumResults != null && logger.isInfoEnabled() && enabledFeeds.size() > 0)
            logger.info("Retrieved " + maximumResults + " feeds in this interval - across a total interval of "
                    + intervalTotal + " seconds and " + enabledFeedCount + " feeds");
        return enabledFeeds;
    }

    /**
     * Retrieve a user agent override from the given feed, or use the default.
     *
     * @param feed
     * @return String
     */
    private String determineUserAgent(final Feed feed) {
        String userAgent = feed.getFeedAnonymization() != null
                && feed.getFeedAnonymization().getPreferredUserAgents() != null
                && feed.getFeedAnonymization().getPreferredUserAgents().size() > 0
                        ? feed.getFeedAnonymization().getPreferredUserAgents().get(0)
                        : defaultUserAgent;

        if (feed.getFeedAnonymization() != null && feed.getFeedAnonymization().getPreferredUserAgents() != null
                && feed.getFeedAnonymization().getPreferredUserAgents().size() > 1
                && feed.getFeedAnonymization().getIsPreferredUserAgentsRandomized())
            userAgent = feed.getFeedAnonymization().getPreferredUserAgents().get(randomGenerator.nextInt(
                    (feed.getFeedAnonymization().getPreferredUserAgents().size() - 1) - RANDOM_RANGE_MINIMUM + 1)
                    + RANDOM_RANGE_MINIMUM);

        return userAgent;
    }
}