com.seajas.search.contender.jms.processor.TestElementProcessor.java Source code

Java tutorial

Introduction

Here is the source code for com.seajas.search.contender.jms.processor.TestElementProcessor.java

Source

/**
 * Copyright (C) 2013 Seajas, the Netherlands.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 3, as
 * published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
package com.seajas.search.contender.jms.processor;

import com.seajas.search.bridge.jms.model.test.TestElement;
import com.seajas.search.bridge.jms.model.test.TestResult;
import com.seajas.search.bridge.jms.model.test.TestResultImpl;
import com.seajas.search.bridge.jms.model.test.TestType;
import com.seajas.search.contender.jms.service.InjectionService;
import com.seajas.search.contender.scripting.XmlHtmlReader;
import com.seajas.search.contender.service.exploration.ExplorationService;
import com.seajas.search.contender.service.modifier.ArchiveModifierService;
import com.seajas.search.contender.service.modifier.FeedModifierService;
import com.seajas.search.contender.service.modifier.SourceElementModifierService;
import com.sun.syndication.feed.synd.SyndFeed;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.URI;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.tika.io.IOUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;

/**
 * Test element processor.
 * 
 * @author Jasper van Veghel <jasper@seajas.com >
 */
@Service
public class TestElementProcessor {
    /**
     * The logger.
     */
    private static final Logger logger = LoggerFactory.getLogger(TestElementProcessor.class);

    /**
     * Constants.
     */
    private static final String RESULT_STATUS_OK = "OK";

    /**
     * The injection service.
     */
    @Autowired
    private InjectionService injectionService;

    /**
     * The archive modifier service.
     */
    @Autowired
    private ArchiveModifierService archiveModifierService;

    /**
     * The feed modifier service.
     */
    @Autowired
    private FeedModifierService feedModifierService;

    /**
     * The exploration service.
     */
    @Autowired
    private ExplorationService explorationService;

    /**
     * The feed element modifier service.
     */
    @Autowired
    private SourceElementModifierService sourceElementModifierService;

    /**
     * Process the given archive.
     * 
     * @param element
     * @param testType
     * @param rendezvousId
     */
    public void process(final TestElement element, final TestType testType, final String rendezvousId) {
        TestResult result;

        if (logger.isInfoEnabled())
            logger.info("Received test processing request of type " + testType.name() + " and rendezvous ID "
                    + rendezvousId);

        switch (testType) {
        case EXPLORE:
            Map<String, Boolean> explorationResults = new HashMap<String, Boolean>();

            String explorationStatus = "No links could be found within this indirect content";

            // Try to retrieve an actual feed first

            try {
                SyndFeed deepFeed = feedModifierService.getFeed(element.getTestingUri(),
                        element.getFeedEncodingOverride(), element.getPreferredUserAgent(), null, null, true);

                if (deepFeed != null) {
                    explorationResults.put(element.getTestingUri().toString(), true);

                    explorationStatus = RESULT_STATUS_OK;
                } else if (logger.isInfoEnabled())
                    logger.info(
                            "Exploration URL is not a direct URL - will attempt to find indirect links instead");
            } catch (Exception e) {
                logger.debug("Ignored", e);
            }

            // And if that fails, retrieve the content regularly and determine if we can scan it for indirect links

            if (explorationResults.size() == 0) {
                SourceElementModifierService.Content content = sourceElementModifierService.getContent(
                        element.getTestingUri(), element.getResultEncodingOverride(), null,
                        element.getPreferredUserAgent());

                // We allow the encoding to be overridden

                if (content != null) {
                    if (content.getContentType() == null || !content.getContentType().contains("html"))
                        logger.error(
                                "The given document does not have a content type, or the content type is not an HTML-type");
                    else {
                        Reader reader = convertStructedContentInputStreamToReader(content.getInputStream(),
                                content.getContentType(), element.getResultEncodingOverride());

                        try {
                            List<String> indirectLinks = explorationService.getIndirectlyAccessibleFeedLinks(
                                    element.getTestingUri(), IOUtils.toString(reader));

                            if (indirectLinks.size() != 0) {
                                for (String indirectLink : indirectLinks)
                                    explorationResults.put(indirectLink, false);

                                explorationStatus = RESULT_STATUS_OK;
                            }
                        } catch (IOException e) {
                            logger.error("Could not convert the given reader to a string", e);
                        } finally {
                            try {
                                reader.close();
                            } catch (IOException e) {
                                logger.error("Unable to close the given reader", e);
                            }
                        }
                    }
                }
            }

            result = new TestResultImpl(explorationStatus.equals(RESULT_STATUS_OK), explorationStatus,
                    explorationResults);

            break;
        case DEEP:
            Map<String, Boolean> deepModifierResults = new HashMap<String, Boolean>();

            String deepStatus = RESULT_STATUS_OK;

            try {
                // Retrieve the feed first

                SyndFeed deepFeed = feedModifierService.getFeed(element.getTestingUri(),
                        element.getFeedEncodingOverride(), element.getPreferredUserAgent(), null, null, false);

                // Then determine the URL to test with, if any

                if (deepFeed != null) {
                    URI deepResultUri = sourceElementModifierService.getResultUri(element.getTestingUri(),
                            deepFeed);

                    if (deepResultUri != null) {
                        deepModifierResults.putAll(
                                sourceElementModifierService.testModifier(element.getModifierId(), deepResultUri,
                                        element.getResultEncodingOverride(), element.getPreferredUserAgent()));

                        for (Entry<String, Boolean> modifierResult : deepModifierResults.entrySet())
                            if (!modifierResult.getValue()) {
                                deepStatus = "Not all modifiers could be successfully applied to the given content";

                                break;
                            }
                    } else
                        deepStatus = "No result to test with could be taken from the testing feed";
                } else
                    deepStatus = "The testing feed could not be constructed";
            } catch (Exception e) {
                deepStatus = e.getMessage();
            }

            result = new TestResultImpl(deepStatus.equals(RESULT_STATUS_OK), deepStatus, deepModifierResults);

            break;
        case SHALLOW:
            Map<String, Boolean> shallowModifierResults = new HashMap<String, Boolean>();

            String shallowStatus = RESULT_STATUS_OK;

            try {
                shallowModifierResults
                        .putAll(feedModifierService.testModifier(element.getModifierId(), element.getTestingUri(),
                                element.getFeedEncodingOverride(), element.getPreferredUserAgent()));

                for (Entry<String, Boolean> modifierResult : shallowModifierResults.entrySet())
                    if (!modifierResult.getValue()) {
                        shallowStatus = "Not all modifiers could be successfully applied to the given content";

                        break;
                    }
            } catch (Exception e) {
                shallowStatus = e.getMessage();
            }

            result = new TestResultImpl(shallowStatus.equals(RESULT_STATUS_OK), shallowStatus,
                    shallowModifierResults);

            break;
        default:
            boolean connectionSuccess;

            if (element.isArchive())
                connectionSuccess = archiveModifierService.testConnection(element.getTestingUri());
            else
                connectionSuccess = feedModifierService.testConnection(element.getTestingUri(),
                        element.getFeedEncodingOverride(), element.getPreferredUserAgent());

            result = new TestResultImpl(connectionSuccess, RESULT_STATUS_OK, new HashMap<String, Boolean>());

            break;
        }

        if (logger.isInfoEnabled())
            logger.info(
                    "Injecting result of test processing request of type " + testType.name() + " and rendezvous ID "
                            + rendezvousId + ", which: " + (result.isSuccess() ? "succeeded" : "failed"));

        injectionService.injectTestResult(result, rendezvousId);
    }

    /**
     * Convert the given structured (XML or HTML) input stream to a character reader.
     * 
     * @param inputStream
     * @param contentType
     * @param encodingOverride
     * @return Reader
     */
    private Reader convertStructedContentInputStreamToReader(final InputStream inputStream,
            final String contentType, final String encodingOverride) {
        Reader reader;

        try {
            if (encodingOverride != null) {
                reader = new InputStreamReader(inputStream, encodingOverride);

                if (logger.isTraceEnabled())
                    logger.trace("Encoding has been specified as " + encodingOverride);
            } else {
                reader = new XmlHtmlReader(inputStream, contentType != null ? contentType : null, true);

                if (logger.isTraceEnabled())
                    logger.trace("Encoding has been detected as " + ((XmlHtmlReader) reader).getEncoding());
            }
        } catch (IOException e) {
            logger.error("Unable to convert the given input stream to a character reader", e);

            return null;
        }

        return reader;
    }
}