com.digitalpebble.stormcrawler.protocol.selenium.RemoteDriverProtocol.java Source code

Java tutorial

Introduction

Here is the source code for com.digitalpebble.stormcrawler.protocol.selenium.RemoteDriverProtocol.java

Source

/**
 * Licensed to DigitalPebble Ltd under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * DigitalPebble licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.digitalpebble.stormcrawler.protocol.selenium;

import java.net.URL;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.concurrent.TimeUnit;

import org.apache.storm.Config;
import org.openqa.selenium.WebDriver.Timeouts;
import org.openqa.selenium.remote.DesiredCapabilities;
import org.openqa.selenium.remote.RemoteWebDriver;

import com.digitalpebble.stormcrawler.util.ConfUtils;

/**
 * Delegates the requests to one or more remote selenium servers. The processes
 * must be started / stopped separately. The URLs to connect to are specified
 * with the config 'selenium.addresses'.
 **/

public class RemoteDriverProtocol extends SeleniumProtocol {

    @Override
    public void configure(Config conf) {
        super.configure(conf);

        // see https://github.com/SeleniumHQ/selenium/wiki/DesiredCapabilities
        DesiredCapabilities capabilities = new DesiredCapabilities();
        capabilities.setJavascriptEnabled(true);

        String userAgentString = getAgentString(conf);

        // custom capabilities
        Map<String, Object> confCapabilities = (Map<String, Object>) conf.get("selenium.capabilities");
        if (confCapabilities != null) {
            Iterator<Entry<String, Object>> iter = confCapabilities.entrySet().iterator();
            while (iter.hasNext()) {
                Entry<String, Object> entry = iter.next();
                Object val = entry.getValue();
                // substitute variable $useragent for the real value
                if (val instanceof String && "$useragent".equalsIgnoreCase(val.toString())) {
                    val = userAgentString;
                }
                capabilities.setCapability(entry.getKey(), entry.getValue());
            }
        }

        // load adresses from config
        List<String> addresses = ConfUtils.loadListFromConf("selenium.addresses", conf);
        if (addresses.size() == 0) {
            throw new RuntimeException("No value found for selenium.addresses");
        }
        try {
            for (String cdaddress : addresses) {
                RemoteWebDriver driver = new RemoteWebDriver(new URL(cdaddress), capabilities);
                Timeouts touts = driver.manage().timeouts();
                int implicitWait = ConfUtils.getInt(conf, "selenium.implicitlyWait", 0);
                int pageLoadTimeout = ConfUtils.getInt(conf, "selenium.pageLoadTimeout", -1);
                int setScriptTimeout = ConfUtils.getInt(conf, "selenium.setScriptTimeout", 0);
                touts.implicitlyWait(implicitWait, TimeUnit.MILLISECONDS);
                touts.pageLoadTimeout(pageLoadTimeout, TimeUnit.MILLISECONDS);
                touts.setScriptTimeout(setScriptTimeout, TimeUnit.MILLISECONDS);
                drivers.add(driver);
            }
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    public static void main(String[] args) throws Exception {
        RemoteDriverProtocol.main(new RemoteDriverProtocol(), args);
    }

}