streaming.crawler.WebDriverResource.java Source code

Java tutorial

Introduction

Here is the source code for streaming.crawler.WebDriverResource.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package streaming.crawler;

import org.openqa.selenium.Capabilities;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import org.openqa.selenium.remote.BrowserType;
import org.openqa.selenium.remote.CapabilityType;
import org.openqa.selenium.remote.DesiredCapabilities;
import org.openqa.selenium.remote.RemoteWebDriver;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.lang.reflect.InvocationTargetException;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.logging.Level;

import static org.openqa.selenium.remote.CapabilityType.SUPPORTS_JAVASCRIPT;
import static org.openqa.selenium.remote.CapabilityType.TAKES_SCREENSHOT;

/**
 * Created by allwefantasy on 17/8/2018.
 */
public class WebDriverResource {

    public static final DesiredCapabilities DEFAULT_CAPABILITIES;
    public static final ChromeOptions DEFAULT_CHROME_CAPABILITIES;

    static {
        // HtmlUnit
        DEFAULT_CAPABILITIES = new DesiredCapabilities();
        DEFAULT_CAPABILITIES.setCapability(SUPPORTS_JAVASCRIPT, true);
        DEFAULT_CAPABILITIES.setCapability(TAKES_SCREENSHOT, false);
        DEFAULT_CAPABILITIES.setCapability("downloadImages", false);
        DEFAULT_CAPABILITIES.setCapability("browserLanguage", "zh_CN");
        DEFAULT_CAPABILITIES.setCapability("resolution", "1920x1080");

        // see https://peter.sh/experiments/chromium-command-line-switches/
        DEFAULT_CHROME_CAPABILITIES = new ChromeOptions();
        DEFAULT_CHROME_CAPABILITIES.merge(DEFAULT_CAPABILITIES);
        DEFAULT_CHROME_CAPABILITIES.setHeadless(true);
        DEFAULT_CHROME_CAPABILITIES.addArguments("--window-size=1920,1080");
    }

    private final Map<Integer, ArrayBlockingQueue<WebDriver>> freeDrivers = new HashMap<>();

    public WebDriver poll() {

        try {
            ArrayBlockingQueue<WebDriver> queue = freeDrivers.get(0);
            if (queue == null) {
                queue = new ArrayBlockingQueue<>(4);
                freeDrivers.put(0, queue);
            }

            if (queue.isEmpty()) {
                allocateWebDriver(queue);
            }

            WebDriver driver = queue.poll(2 * 2, TimeUnit.SECONDS);

            return driver;
        } catch (InterruptedException e) {
            System.out.println("Failed to poll a WebDriver from pool, " + e.toString());
        }

        // TODO: throw exception
        return null;
    }

    public void put(WebDriver driver) {
        try {
            freeDrivers.get(0).put(driver);

        } catch (InterruptedException e) {
            e.printStackTrace();
        }
    }

    private void allocateWebDriver(ArrayBlockingQueue<WebDriver> queue) {
        if (freeDrivers.get(0).size() > 10) {
            return;
        }

        try {
            WebDriver driver = doCreateWebDriver(true, BrowserType.CHROME, null, 3l, 3l, 3l);
            if (driver != null) {
                queue.put(driver);
            }
        } catch (Throwable e) {
            e.printStackTrace();
            // throw new RuntimeException("Can not create WebDriver");
        }
    }

    private void close() {
        for (WebDriver webDriver : freeDrivers.get(0)) {
            try {
                webDriver.quit();
            } catch (Exception e) {

            }
        }
    }

    private WebDriver doCreateWebDriver(boolean headless, String browserType,
            Class<? extends WebDriver> defaultWebDriverClass, Long pageLoadTimeout, Long scriptTimeout,
            Long implicitlyWait

    ) throws NoSuchMethodException, IllegalAccessException, InvocationTargetException, InstantiationException {

        DesiredCapabilities capabilities = new DesiredCapabilities(DEFAULT_CAPABILITIES);
        ChromeOptions chromeOptions = new ChromeOptions();
        chromeOptions.merge(DEFAULT_CHROME_CAPABILITIES);
        // Use headless mode by default, GUI mode can be used for debugging
        chromeOptions.setHeadless(headless);
        if (headless) {
            // Do not downloading images in headless mode
            chromeOptions.addArguments("--blink-settings=imagesEnabled=false");
        }

        // Reset proxy
        capabilities.setCapability(CapabilityType.PROXY, (Object) null);
        chromeOptions.setCapability(CapabilityType.PROXY, (Object) null);

        // Choose the WebDriver
        WebDriver driver;
        if (browserType == BrowserType.CHROME) {
            driver = new ChromeDriver(chromeOptions);
        } else {
            if (RemoteWebDriver.class.isAssignableFrom(defaultWebDriverClass)) {
                driver = defaultWebDriverClass.getConstructor(Capabilities.class).newInstance(capabilities);
            } else {
                driver = defaultWebDriverClass.getConstructor().newInstance();
            }
        }

        // Set timeouts
        WebDriver.Timeouts timeouts = driver.manage().timeouts();
        timeouts.pageLoadTimeout(pageLoadTimeout, TimeUnit.SECONDS);
        timeouts.setScriptTimeout(scriptTimeout, TimeUnit.SECONDS);
        timeouts.implicitlyWait(implicitlyWait, TimeUnit.SECONDS);

        // Set log level
        if (driver instanceof RemoteWebDriver) {
            RemoteWebDriver remoteWebDriver = (RemoteWebDriver) driver;

            final Logger webDriverLog = LoggerFactory.getLogger(WebDriver.class);
            Level level = Level.FINE;
            if (webDriverLog.isDebugEnabled()) {
                level = Level.FINER;
            } else if (webDriverLog.isTraceEnabled()) {
                level = Level.ALL;
            }

            remoteWebDriver.setLogLevel(level);
        }

        return driver;
    }

}