Java tutorial
/** * Copyright (C) 2013 Seajas, the Netherlands. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License version 3, as * published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package com.seajas.search.profiler.task; import java.net.URI; import java.net.URISyntaxException; import java.util.ArrayList; import java.util.Calendar; import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Random; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Component; import org.springframework.util.StringUtils; import com.seajas.search.bridge.jms.integration.GroupIdDecorator; import com.seajas.search.bridge.profiler.model.feed.Feed; import com.seajas.search.bridge.profiler.model.feed.FeedResultParameter; import com.seajas.search.bridge.profiler.model.feed.FeedUrl; import com.seajas.search.profiler.authentication.strategy.AuthenticationResult; import com.seajas.search.profiler.jms.service.InjectionService; import com.seajas.search.profiler.service.profiler.ProfilerService; /** * Feed injection task. * * @author Jasper van Veghel <jasper@seajas.com> */ @Component public class FeedInjectionTask implements InjectionTask { /** * The logger. */ private static final Logger logger = LoggerFactory.getLogger(FeedInjectionTask.class); /** * The random minimum. */ private static final Integer RANDOM_RANGE_MINIMUM = 0; /** * The default trigger. */ private static final String TRIGGER_NAME_DEFAULT = "default"; /** * The random number generator for random delays and user agents. */ private static final Random randomGenerator = new Random(); /** * The profiler service. */ @Autowired private ProfilerService profilerService; /** * The injection service. */ @Autowired private InjectionService injectionService; /** * The result mapping parameter. */ @Value("${profiler.project.result.mapping.parameter}") private String resultMappingParameter; /** * The result mapping header. */ @Value("${profiler.project.result.mapping.header}") private String resultMappingHeader; /** * The default user agent. */ @Value("${profiler.project.anonymization.default.http.user.agent}") private String defaultUserAgent; /** * {@inheritDoc} */ @Override public void inject(final String triggerName, final Long intervalTotal, final InjectionJobInterrupted interrupted) { Date currentTime = new Date(); List<Feed> enabledFeeds = getInjectableFeeds(triggerName, intervalTotal, currentTime); // Only log when we're not doing distributed injection - so to not get this every second if (logger.isInfoEnabled() && intervalTotal == null) logger.info("Performing feed injection under trigger '" + triggerName + "' (" + enabledFeeds.size() + " feed" + (enabledFeeds.size() != 1 ? "s" : "") + ")"); // We report on the number of feeds first, and then move on to potentially canceling the operation if (Boolean.getBoolean("profiler.indexing.disabled")) { if (logger.isInfoEnabled()) logger.info( "Indexing has been explicitly disabled using 'profiler.indexing.disabled=true'. Skipping injection."); return; } for (Feed feed : enabledFeeds) { if (interrupted.isInterrupted()) { logger.warn("This job was interrupted - not continuing with feed injection"); break; } // Mark this feed's injection date (ahead of the actual injection - but then this is not such a critical matter) profilerService.updateFeedLastInjected(feed.getId(), currentTime); // Determine whether this feed falls within the anonymization run from / until range if (feed.getFeedAnonymization() != null) { Date currentDate = Calendar.getInstance().getTime(), runFrom = feed.getFeedAnonymization().getRunFrom(), runUntil = feed.getFeedAnonymization().getRunUntil(); if ((runFrom != null && currentDate.before(runFrom)) || (runUntil != null && currentDate.after(runUntil))) { logger.info("The feed with name '" + feed.getName() + "' falls outside of its anonymization run from/until date - skipping"); continue; } } // Create a map out of the feed result parameters Map<String, String> resultParameters = new HashMap<String, String>(); if (feed.getFeedResultParameters() != null && feed.getFeedResultParameters().size() > 0) for (FeedResultParameter feedResultParameter : feed.getFeedResultParameters()) { if (resultParameters.containsKey(feedResultParameter.getFieldName())) logger.warn("The result map already contains a parameter named '" + feedResultParameter.getFieldName() + "' - it will be overwritten"); resultParameters.put(feedResultParameter.getFieldName(), feedResultParameter.getFieldValue()); } // Now inject each feed URL for (FeedUrl feedUrl : feed.getFeedUrls()) { if (interrupted.isInterrupted()) { logger.warn("This job was interrupted - not continuing with feed injection"); break; } try { Long feedDelay = 0L, resultDelay = 0L; // Determine the feed-URL specific anonymization settings if (feed.getFeedAnonymization() != null) { // Determine the feed delay feedDelay = feed.getFeedAnonymization() != null && feed.getFeedAnonymization().getFeedDelay() != null ? feed.getFeedAnonymization().getFeedDelay() : 0L; if (feed.getFeedAnonymization().getIsFeedDelayRandomized()) feedDelay = (long) randomGenerator .nextInt(feedDelay.intValue() - RANDOM_RANGE_MINIMUM + 1) + RANDOM_RANGE_MINIMUM; // Don't randomize the result delay here, but instead do this on the contender-side resultDelay = feed.getFeedAnonymization() != null && feed.getFeedAnonymization().getFeedElementDelay() != null ? feed.getFeedAnonymization().getFeedElementDelay() : 0L; } // Perform the optional authentication step String authenticatedUrl = feedUrl.getUrl(); Map<String, String> authenticatedResultParameters = resultParameters; if (feed.getFeedConnection() != null) { if (feed.getFeedConnection().getAuthenticationStrategy() != null) { AuthenticationResult authenticationResult = profilerService.applyAuthenticationStrategy( feed.getFeedConnection().getAuthenticationStrategy(), authenticatedUrl, authenticatedResultParameters); authenticatedUrl = authenticationResult.getUrl(); authenticatedResultParameters = authenticationResult.getResultParameters(); } } URI uri = new URI(authenticatedUrl); String hostname = StringUtils.hasText(uri.getHost()) ? uri.getHost().replace("www.", "") : "localhost"; logger.info("Injecting feed with name '" + feed.getName() + "' and URI '" + uri + "' into bridge layer"); // Determine which of the result parameters to turn into headers Map<String, String> resultHeaders = null; if (StringUtils.hasText(resultMappingParameter) && StringUtils.hasText(resultMappingHeader)) { if (!authenticatedResultParameters.containsKey(resultMappingParameter)) { if (logger.isDebugEnabled()) logger.debug("Result mapping was requested, but result mapping parameter '" + resultMappingParameter + "' is not present in the result parameter map - not adding header '" + resultMappingHeader + "'"); } else { resultHeaders = new HashMap<String, String>(); resultHeaders.put(resultMappingHeader, authenticatedResultParameters.get(resultMappingParameter)); } } // Inject it into the queue com.seajas.search.bridge.jms.model.Feed resultFeed = new com.seajas.search.bridge.jms.model.Feed(); resultFeed.setUri(uri); resultFeed.setId(feed.getId()); resultFeed.setName(feed.getName()); resultFeed.setHostname(hostname); resultFeed.setCollection(feed.getCollection()); resultFeed.setFeedEncodingOverride(feed.getFeedEncodingOverride()); resultFeed.setResultEncodingOverride(feed.getResultEncodingOverride()); resultFeed.setLanguageOverride(feed.getLanguage()); resultFeed.setSummaryBased(feed.getIsSummaryBased()); resultFeed.setDelay(feedDelay); resultFeed.setElementDelay(resultDelay); resultFeed.setElementDelayRandomized(feed.getFeedAnonymization() != null && Boolean.TRUE.equals(feed.getFeedAnonymization().getIsFeedElementDelayRandomized())); resultFeed.setUserAgent(determineUserAgent(feed)); resultFeed.setUserAgentsOverride(feed.getFeedAnonymization() != null ? feed.getFeedAnonymization().getPreferredUserAgents() : null); resultFeed.setUserAgentsOverrideRandomized(feed.getFeedAnonymization() != null && Boolean.TRUE .equals(feed.getFeedAnonymization().getIsPreferredUserAgentsRandomized())); resultFeed.setResultParameters(authenticatedResultParameters); resultFeed.setRetrievalRequestHeaders(resultHeaders); injectionService.inject(resultFeed, GroupIdDecorator.decorate(hostname), feedDelay); } catch (URISyntaxException e) { logger.error("The given " + (feed.getFeedConnection() != null && feed.getFeedConnection().getAuthenticationStrategy() != null ? "(unauthenticated) " : "") + "feed URI '" + feedUrl.getUrl() + "' is not valid"); } } } if (logger.isTraceEnabled()) logger.trace("Finished feed injection for trigger '" + triggerName + "'"); } /** * Determine the injectable feeds. * * @param triggerName * @param intervalTotal * @param currentTime * @return List<Feed> */ private List<Feed> getInjectableFeeds(final String triggerName, final Long intervalTotal, final Date currentTime) { List<Feed> enabledFeeds = new ArrayList<Feed>(); // If this is the default trigger, we add all un-injected enabled feeds if (TRIGGER_NAME_DEFAULT.equals(triggerName)) { List<Feed> uninjectedFeeds = profilerService.getEnabledUninjectedFeeds(); if (uninjectedFeeds.size() > 0) { if (logger.isInfoEnabled()) logger.info(String.format("Injecting %d additional, previously un-injected, feeds", uninjectedFeeds.size())); enabledFeeds.addAll(uninjectedFeeds); } } // Add all enabled feeds for this trigger Integer enabledFeedCount = profilerService.getEnabledFeedCount(triggerName); Long injectedBefore = intervalTotal != null ? currentTime.getTime() - (intervalTotal * 1000) : null; Long maximumResults = intervalTotal != null ? (long) Math.ceil((double) enabledFeedCount / (double) intervalTotal) : null; enabledFeeds.addAll(profilerService.getEnabledFeeds(triggerName, injectedBefore != null ? new Date(injectedBefore) : null, maximumResults)); if (maximumResults != null && logger.isInfoEnabled() && enabledFeeds.size() > 0) logger.info("Retrieved " + maximumResults + " feeds in this interval - across a total interval of " + intervalTotal + " seconds and " + enabledFeedCount + " feeds"); return enabledFeeds; } /** * Retrieve a user agent override from the given feed, or use the default. * * @param feed * @return String */ private String determineUserAgent(final Feed feed) { String userAgent = feed.getFeedAnonymization() != null && feed.getFeedAnonymization().getPreferredUserAgents() != null && feed.getFeedAnonymization().getPreferredUserAgents().size() > 0 ? feed.getFeedAnonymization().getPreferredUserAgents().get(0) : defaultUserAgent; if (feed.getFeedAnonymization() != null && feed.getFeedAnonymization().getPreferredUserAgents() != null && feed.getFeedAnonymization().getPreferredUserAgents().size() > 1 && feed.getFeedAnonymization().getIsPreferredUserAgentsRandomized()) userAgent = feed.getFeedAnonymization().getPreferredUserAgents().get(randomGenerator.nextInt( (feed.getFeedAnonymization().getPreferredUserAgents().size() - 1) - RANDOM_RANGE_MINIMUM + 1) + RANDOM_RANGE_MINIMUM); return userAgent; } }