org.rivalry.core.datacollector.DefaultDataCollector.java Source code

Java tutorial

Introduction

Here is the source code for org.rivalry.core.datacollector.DefaultDataCollector.java

Source

//*****************************************************************************
// Rivalry (http://code.google.com/p/rivalry)
// Copyright (c) 2011 Rivalry.org
// Admin rivalry@jeffreythompson.net
//
// See the file "LICENSE.txt" for information on usage and redistribution of
// this file, and for a DISCLAIMER OF ALL WARRANTIES.
//*****************************************************************************
package org.rivalry.core.datacollector;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.logging.Level;

import org.apache.commons.lang.StringUtils;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.htmlunit.HtmlUnitDriver;
import org.rivalry.core.comparator.CandidateValueComparator;
import org.rivalry.core.model.Candidate;
import org.rivalry.core.model.Category;
import org.rivalry.core.model.Criterion;
import org.rivalry.core.model.RivalryData;
import org.rivalry.core.util.Provider;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Provides a default implementation of a data collector.
 */
public class DefaultDataCollector implements DataCollector {
    /** Logger. */
    static final Logger LOGGER = LoggerFactory.getLogger(DefaultDataCollector.class);

    /** Candidate provider. */
    private final Provider<Candidate> _candidateProvider;

    /** Category provider. */
    private final Provider<Category> _categoryProvider;

    /** Criterion provider. */
    private final Provider<Criterion> _criterionProvider;

    /** Data post processor. */
    private final DataPostProcessor _dataPostProcessor;

    /** Flag indicating whether to create an average candidate. */
    private final boolean _isAverageCandidateCreated;

    /** Flag indicating whether to enable Javascript. */
    private final boolean _isJavascriptEnabled;

    /** Flag indicating whether to create a median candidate. */
    private final boolean _isMedianCandidateCreated;

    /** Name string parser. */
    private final NameStringParser _nameStringParser;

    /** Value string parser. */
    private final ValueStringParser<?> _valueStringParser;

    /** Maximum number of threads. */
    private final Integer _maxThreads;

    /**
     * Construct this object with the given parameter.
     * 
     * @param isJavascriptEnabled Flag indicating whether to enable Javascript.
     * @param maxThreads Maximum number of threads.
     * @param nameStringParser Name string parser.
     * @param valueStringParser Value string parser.
     * @param candidateProvider Candidate provider.
     * @param categoryProvider Category provider.
     * @param criterionProvider Criterion provider.
     * @param dataPostProcessor Data post processor.
     * @param isAverageCandidateCreated Flag indicating whether to create an
     *            average candidate.
     * @param isMedianCandidateCreated Flag indicating whether to create a
     *            median candidate.
     */
    public DefaultDataCollector(final boolean isJavascriptEnabled, final Integer maxThreads,
            final NameStringParser nameStringParser, final ValueStringParser<?> valueStringParser,
            final Provider<Candidate> candidateProvider, final Provider<Category> categoryProvider,
            final Provider<Criterion> criterionProvider, final DataPostProcessor dataPostProcessor,
            final boolean isAverageCandidateCreated, final boolean isMedianCandidateCreated) {
        _isJavascriptEnabled = isJavascriptEnabled;
        _maxThreads = maxThreads;
        _nameStringParser = nameStringParser;
        _valueStringParser = valueStringParser;
        _candidateProvider = candidateProvider;
        _categoryProvider = categoryProvider;
        _criterionProvider = criterionProvider;
        _dataPostProcessor = dataPostProcessor;
        _isAverageCandidateCreated = isAverageCandidateCreated;
        _isMedianCandidateCreated = isMedianCandidateCreated;
    }

    @Override
    public void fetchData(final DCSpec dcSpec, final String username, final String password,
            final RivalryData rivalryData) {
        final long start = System.currentTimeMillis();

        if (_maxThreads == 1) {
            final WebDriver webDriver = createWebDriver();

            try {
                for (final Candidate candidate : rivalryData.getCandidates()) {
                    fetchData(webDriver, dcSpec, rivalryData, candidate);
                }
            } catch (final Exception e) {
                e.printStackTrace();
            }
        } else {
            final ExecutorService executorService = Executors.newFixedThreadPool(_maxThreads);

            for (final Candidate candidate : rivalryData.getCandidates()) {
                final Runnable task = new Runnable() {
                    @Override
                    public void run() {
                        final WebDriver webDriver = createWebDriver();
                        fetchData(webDriver, dcSpec, rivalryData, candidate);
                    }
                };

                System.out.println(candidate.getName() + " submit");
                executorService.submit(task);
            }

            executorService.shutdown();

            try {
                executorService.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS);
            } catch (final InterruptedException e) {
                e.printStackTrace();
            }
        }

        _dataPostProcessor.postProcess(rivalryData);

        final Candidate averageCandidate = createAverageCandidate(rivalryData);
        final Candidate medianCandidate = createMedianCandidate(rivalryData);

        if (averageCandidate != null) {
            rivalryData.getCandidates().add(averageCandidate);
        }

        if (medianCandidate != null) {
            rivalryData.getCandidates().add(medianCandidate);
        }

        final long end = System.currentTimeMillis();
        logTiming("0 fetchData()", start, end);
    }

    /**
     * @return a new web driver.
     */
    WebDriver createWebDriver() {
        final WebDriver answer = new HtmlUnitDriver();

        final java.util.logging.Logger logger = java.util.logging.Logger.getLogger("");
        logger.setLevel(Level.OFF);

        if (answer instanceof HtmlUnitDriver) {
            ((HtmlUnitDriver) answer).setJavascriptEnabled(_isJavascriptEnabled);
        }

        return answer;
    }

    /**
     * @param webDriver Web driver.
     * @param dcSpec Data collector specification.
     * @param rivalryData Rivalry data.
     * @param candidate Candidate.
     */
    void fetchData(final WebDriver webDriver, final DCSpec dcSpec, final RivalryData rivalryData,
            final Candidate candidate) {
        final long start = System.currentTimeMillis();

        // And now use this to visit a web page.
        final String url = candidate.getPage();
        if (StringUtils.isNotEmpty(url)) {
            System.out.println("Accessing URL " + url);
            webDriver.get(url);
        }

        for (final DCSelector selector : dcSpec.getSelectors()) {
            final SelectorType type = selector.getType();
            switch (type) {
            case CLICK:
                final DCSelector childSelector = selector.getSelectors().get(0);
                final SelectorType childType = childSelector.getType();
                final String childValue = childSelector.getValue();
                LOGGER.debug("childType = " + childType + " childValue = " + childValue);
                childType.findElement(webDriver, childValue).click();
                LOGGER.debug("2 Page title is: " + webDriver.getTitle());
                break;

            case SUBMIT:
                type.findElement(webDriver, selector.getValue()).submit();
                break;

            default:
                final List<WebElement> parents = type.findElements(webDriver, selector.getValue());
                for (final WebElement parent : parents) {
                    process(rivalryData, candidate, parent, selector.getSelectors());
                }
            }
        }

        final long end = System.currentTimeMillis();
        logTiming(candidate.getName() + " 2 fetchData()", start, end);
    }

    /**
     * @param rivalryData Rivalry data.
     * 
     * @return a new candidate with average ratings.
     */
    private Candidate createAverageCandidate(final RivalryData rivalryData) {
        Candidate answer = null;

        if (_isAverageCandidateCreated) {
            answer = getCandidateProvider().newInstance();
            answer.setName("*AVERAGE*");

            final int size = rivalryData.getCandidates().size();

            for (final Criterion criterion : rivalryData.getCriteria()) {
                Double sum = null;

                for (final Candidate candidate : rivalryData.getCandidates()) {
                    final Double rating = candidate.getRating(criterion);

                    if (rating != null) {
                        if (sum == null) {
                            sum = 0.0;
                        }
                        sum += rating;
                    }
                }

                if (sum != null) {
                    answer.putValue(criterion, sum / size);
                }
            }
        }

        return answer;
    }

    /**
     * @param name Name.
     * 
     * @return a new category.
     */
    private Category createCategory(final String name) {
        final Category answer = getCategoryProvider().newInstance();

        answer.setName(name);

        return answer;
    }

    /**
     * @param name Name.
     * @param category Category.
     * 
     * @return a new criterion.
     */
    private Criterion createCriterion(final String name, final Category category) {
        final Criterion answer = getCriterionProvider().newInstance();

        answer.setName(name);
        answer.setCategory(category);

        return answer;
    }

    /**
     * @param rivalryData Rivalry data.
     * 
     * @return a new candidate with median ratings.
     */
    private Candidate createMedianCandidate(final RivalryData rivalryData) {
        Candidate answer = null;

        if (_isMedianCandidateCreated) {
            answer = getCandidateProvider().newInstance();
            answer.setName("*MEDIAN*");

            final List<Candidate> candidates = new ArrayList<Candidate>(rivalryData.getCandidates());
            final int midpoint = candidates.size() / 2;

            for (final Criterion criterion : rivalryData.getCriteria()) {
                final Comparator<Candidate> comparator = new CandidateValueComparator(criterion);
                Collections.sort(candidates, comparator);
                final Candidate candidate = candidates.get(midpoint);
                final Object value = candidate.getValue(criterion);
                answer.putValue(criterion, value);
            }
        }

        return answer;
    }

    /**
     * @param parent Parent web element.
     * @param selector0 Data collection selector.
     * @param size Size.
     * 
     * @return a list of names.
     */
    private List<String> createNames(final WebElement parent, final DCSelector selector0, final int size) {
        final List<String> answer = new ArrayList<String>();

        if (selector0.getType() == SelectorType.LITERAL) {
            for (int i = 0; i < size; i++) {
                answer.add(selector0.getValue());
            }
        } else {
            LOGGER.debug("parent = " + parent);
            final List<WebElement> elements0 = selector0.getType().findElements(parent, selector0.getValue());
            final int size0 = elements0.size();
            LOGGER.debug("size0 = " + size0);

            for (int i = 0; i < size0; i++) {
                final String name = getNameStringParser().parse(elements0.get(i));
                LOGGER.debug(i + " name = [" + name + "]");
                if (StringUtils.isNotEmpty(name)) {
                    answer.add(name);
                }
            }
        }

        return answer;
    }

    /**
     * @return the candidateProvider
     */
    private Provider<Candidate> getCandidateProvider() {
        return _candidateProvider;
    }

    /**
     * @return the categoryProvider
     */
    private Provider<Category> getCategoryProvider() {
        return _categoryProvider;
    }

    /**
     * @return the criterionProvider
     */
    private Provider<Criterion> getCriterionProvider() {
        return _criterionProvider;
    }

    /**
     * @return the nameStringParser
     */
    private NameStringParser getNameStringParser() {
        return _nameStringParser;
    }

    /**
     * @return the valueStringParser
     */
    private ValueStringParser<?> getValueStringParser() {
        return _valueStringParser;
    }

    /**
     * Log the given timing parameters.
     * 
     * @param title Title.
     * @param start Start time.
     * @param end End time.
     */
    private void logTiming(final String title, final long start, final long end) {
        System.out.println(title + " \telapsed: " + (end - start) + " ms");
        LOGGER.info(title + " \telapsed: " + (end - start) + " ms");
    }

    /**
     * Process the given parameters to create new entries for rivalry data.
     * 
     * @param rivalryData Rivalry data.
     * @param candidate Candidate.
     * @param parent Parent web element.
     * @param selector Selector.
     */
    private void process(final RivalryData rivalryData, final Candidate candidate, final WebElement parent,
            final DCSelector selector) {
        LOGGER.debug("selector = " + selector.getType() + " " + selector.getValue());

        final WebElement child = selector.getType().findElement(parent, selector.getValue());

        if (selector.getSelectors().isEmpty()) {
            LOGGER.debug("parent.getText() = [" + parent.getText() + "]");
            LOGGER.debug("child.getText()  = [" + child.getText() + "]");
        } else {
            process(rivalryData, candidate, child, selector.getSelectors());
        }
    }

    /**
     * Process the given parameters to create new entries for rivalry data.
     * 
     * @param rivalryData Rivalry data.
     * @param candidate Candidate.
     * @param parent Parent web element.
     * @param selectors Selectors.
     */
    private void process(final RivalryData rivalryData, final Candidate candidate, final WebElement parent,
            final List<DCSelector> selectors) {
        if ((selectors.size() == 2 || selectors.size() == 3) && selectors.get(0).getSelectors().isEmpty()
                && selectors.get(1).getSelectors().isEmpty()) {
            // Parent selector has two or three leaf children.
            final DCSelector selector0 = selectors.get(0);
            final DCSelector selector1 = selectors.get(1);
            final List<WebElement> elements1 = selector1.getType().findElements(parent, selector1.getValue());

            final int size = elements1.size();
            final List<String> names = createNames(parent, selector0, size);
            LOGGER.debug("names = " + names);

            if (size > 0 && !rivalryData.getCandidates().contains(candidate)) {
                rivalryData.getCandidates().add(candidate);
            }

            Category category = null;

            if (selectors.size() == 3) {
                // Create a category.
                final DCSelector selector2 = selectors.get(2);
                String categoryName;
                if (selector2.getType() == SelectorType.LITERAL) {
                    categoryName = selector2.getValue();
                } else {
                    final List<WebElement> elements2 = selector2.getType().findElements(parent,
                            selector2.getValue());
                    final WebElement valueElement = elements2.get(0);
                    categoryName = valueElement.getText();
                }
                LOGGER.debug("categoryName = [" + categoryName + "]");
                category = rivalryData.findCategoryByName(categoryName);

                if (category == null) {
                    category = createCategory(categoryName);
                    rivalryData.getCategories().add(category);
                }
            }

            for (int i = 0; i < size; i++) {
                final String name = names.get(i);

                if (StringUtils.isNotEmpty(name)) {
                    final WebElement valueElement = elements1.get(i);

                    final String criterionName = name;
                    Criterion criterion = rivalryData.findCriterionByName(criterionName);
                    if (criterion == null) {
                        criterion = createCriterion(criterionName, category);
                        rivalryData.getCriteria().add(criterion);
                    }

                    final Object value = getValueStringParser().parse(valueElement);
                    LOGGER.debug("name = [" + name + "] value = [" + value + "]");
                    candidate.putValue(criterion, value);
                }
            }
        } else {
            for (final DCSelector selector : selectors) {
                process(rivalryData, candidate, parent, selector);
            }
        }
    }
}