pagelyzer.Capture.java Source code

Java tutorial

Introduction

Here is the source code for pagelyzer.Capture.java

Source

/*
# Andrs Sanoja
# UPMC - LIP6
# pagelyzer 
#
# Copyright (C) 2011, 2012, 2013, 2014 Andrs Sanoja, Universit Pierre et Marie Curie -
# Laboratoire d'informatique de Paris 6 (LIP6)
#
# Authors
# Andrs Sanoja andres.sanoja@lip6.fr
# Alexis Lechervy alexis.lechervy@lip6.fr
# Zeynep Pehlivan zeynep.pehlivan@lip6.fr
# Myriam Ben Saad myriam.ben-saad@lip6.fr
# Marc Law marc.law@lip6.fr
# Carlos Sureda carlos.sureda@lip6.fr
# Jordi Creus jordi.creus@lip6.fr
# LIP6 / Universit Pierre et Marie Curie
    
# Responsables WP
# Matthieu CORD/UPMC
# Stphane GANARSKI/UPMC
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# Some parts of this package are adapted from the BrowserShot proyect developed by IM, France.
# https://github.com/sbarton/browser-shot-tool-mapred
 */

package pagelyzer;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.util.Arrays;
import java.util.concurrent.TimeUnit;
import java.util.logging.Level;
import java.util.logging.Logger;

import org.apache.commons.configuration.XMLConfiguration;
import org.openqa.selenium.By;
import org.openqa.selenium.Keys;
import org.openqa.selenium.OutputType;
import org.openqa.selenium.Platform;
import org.openqa.selenium.TakesScreenshot;
import org.openqa.selenium.WebDriverException;
import org.openqa.selenium.interactions.Actions;
import org.openqa.selenium.remote.DesiredCapabilities;
import org.openqa.selenium.support.ui.*;

/**
 * Class that encapsulates selenium web driver (local and remote)
 * @author sanojaa
 */
public class Capture {
    public BrowserRep browser = new BrowserRep();
    public CaptureResult result = new CaptureResult();
    private XMLConfiguration config;
    private int ATTEMPTMAX = 10;

    /**
     * Constructor
     * @param config2 the current configuration
     */
    public Capture(XMLConfiguration config2) {
        this.config = config2;
    }

    /**
     * This function is a adaptation from the initWebDriver() method from BrowserShot_mapred proyect
     * Initialize the webdriver given the capability object
     * @param capability
     * @return 
     */
    public void initWebDriver() {
        String msg = "";
        boolean done = false;
        int attemptNo = 0;
        while (!done && attemptNo < ATTEMPTMAX) {// to limit attempts to 10 ZP
            attemptNo++;
            try {
                System.out.println("Attempt = " + attemptNo);
                if (config.getString("selenium.run.mode").equals(JPagelyzer.LOCAL)) {
                    this.browser.setLocalDriver();
                } else {
                    this.browser.setRemoteDriver(config.getString("selenium.server.url"));
                }
                this.browser.resize(config.getInt("selenium.client.width"),
                        config.getInt("selenium.client.height"));
                done = true;
            } catch (MalformedURLException e) {
                throw new RuntimeException("Invalid Selenium driver URL", e);
            } catch (IOException e) {
                if (config.getBoolean("pagelyzer.run.verbose")) {
                    msg = e.toString();
                }
                System.out.println("Attempt failed sleeping for 10s." + msg);
                try {
                    Thread.sleep(10 * 1000);
                } catch (InterruptedException ex) {
                    Logger.getLogger(Capture.class.getName()).log(Level.SEVERE, null, ex);
                }
            } catch (WebDriverException e) {
                if (config.getBoolean("pagelyzer.run.verbose")) {
                    msg = e.toString();
                }
                System.out.println("Attempt failed sleeping for 10s." + msg);
                try {
                    Thread.sleep(10 * 1000);
                } catch (InterruptedException ex) {
                    Logger.getLogger(Capture.class.getName()).log(Level.SEVERE, null, ex);
                }
            } catch (Exception e) {
                if (config.getBoolean("pagelyzer.run.verbose")) {
                    msg = e.toString();
                }
                System.out.println("Some proble arrived :(" + msg);
            }
        }

    }

    /**
     * Prepare a new instance of webdriver for browser
    * This function is a adaptation from the setup() method from BrowserShot_mapred proyect
    * @param browser the current browser to setup
    **/
    public void setup(String browser) {
        System.out.println("Setting up browser: " + browser);
        DesiredCapabilities capability = null;
        if (browser.equals("firefox")) {
            capability = DesiredCapabilities.firefox();
        } else if (browser.equals("opera")) {
            capability = DesiredCapabilities.opera();
        } else if (browser.equals("chrome")) {
            capability = DesiredCapabilities.chrome();
            capability.setCapability("chrome.switches", Arrays.asList("--disable-logging"));
        } else {
            throw new RuntimeException("Browser " + browser + " not recognized.");
        }

        capability.setPlatform(Platform.LINUX);
        this.browser.desc = browser;
        this.browser.capabilities = capability;
        initWebDriver();
    }

    /**
     * Close the current webdriver instance
    * This function is a adaptation from the cleanup() method from BrowserShot_mapred proyect
    * @throws IOException
    * @throws InterruptedException
    **/
    public void cleanup() throws IOException, InterruptedException {
        this.browser.driver.close();
    }

    /**
     * Execute a capture from a current browser instance.
    * This function is a adaptation from the getScreenShotWithTimeout() method from BrowserShot_mapred proyect
    * @param url the web page to capture
    * @param screenshot indicates if a page screenshot should be taken
    * @param segmentation indicates if the segmentation should be done
    * @return CaptureResult the result of the capture
    **/
    public CaptureResult process(String url, boolean screenshot, boolean segmentation) {
        String serverlink;
        boolean local = true;
        ServerLyzer server = null;
        String srcJS = "";
        String jqueryJS = "";
        String polyKJS = "";
        String cryptoJS = "";

        System.out.println("getting data using driver: " + this.browser.desc);

        if (segmentation) {
            if ((config.getString("pagelyzer.run.internal.server.remote.url") == null)) {
                serverlink = "http://" + config.getString("pagelyzer.run.internal.server.local.ip") + ":"
                        + config.getString("pagelyzer.run.internal.server.local.port");
                local = true;
            } else {
                serverlink = config.getString("pagelyzer.run.internal.server.remote.url");
                local = false;
            }
            srcJS = serverlink + "/bomlib.js";
            jqueryJS = serverlink + "/jquery-min.js";
            polyKJS = serverlink + "/polyk.js";
            cryptoJS = serverlink + "/md5.js";
        }

        try {
            this.browser.driver.get(url);
            result.title = this.browser.driver.getTitle();
            result.srcHTML = this.browser.driver.getPageSource();

            //                if (mirror) {
            //                    Actions actionObject = new Actions(this.browser.driver);
            //                    actionObject.keyDown(Keys.CONTROL).sendKeys("s").perform();
            //                    WebDriverWait finish = new WebDriverWait(this.browser.driver,120);
            //                    actionObject.sendKeys(Keys.RETURN).perform();
            //                    synchronized (this.browser.driver) {
            //                        this.browser.driver.wait(1000);
            //                    }
            //                }

            System.out.println("title: " + result.title);
            if (screenshot) {
                result.image = ((TakesScreenshot) this.browser.driver).getScreenshotAs(OutputType.BYTES);
            }
            if (segmentation) {
                if (local) {
                    server = new ServerLyzer(config);
                    int port = config.getInt("pagelyzer.run.internal.server.local.port");
                    server.start(port, config.getString("pagelyzer.run.internal.server.local.wwwroot"));
                }

                this.browser.js.executeScript(
                        "var s=window.document.createElement('script');s.setAttribute('id','bominject');s.setAttribute('src','"
                                + srcJS + "');window.document.head.appendChild(s)");
                this.browser.js.executeScript(
                        "var j=window.document.createElement('script');j.setAttribute('id','bomjquery');j.setAttribute('src','"
                                + jqueryJS + "');window.document.head.appendChild(j)");
                this.browser.js.executeScript(
                        "var k=window.document.createElement('script');k.setAttribute('id','bompolyk');k.setAttribute('src','"
                                + polyKJS + "');window.document.head.appendChild(k)");
                this.browser.js.executeScript(
                        "var q=window.document.createElement('script');q.setAttribute('id','bompolyk');q.setAttribute('src','"
                                + cryptoJS + "');window.document.head.appendChild(q)");

                WebDriverWait wait = new WebDriverWait(this.browser.driver, 120);
                wait.until(ExpectedConditions.presenceOfElementLocated(By.id("bominject")));

                String bomversion = (String) this.browser.js.executeScript("return bomversion");

                System.out.println(
                        "Using BoM algorithm v" + bomversion + " pAC=" + config.getString("bom.granularity"));
                String auxr = (String) this.browser.js.executeScript("return startSegmentation(window,"
                        + config.getString("bom.granularity") + "," + config.getString("bom.separation") + ",'"
                        + config.getString("bom.returns") + "')");
                switch (config.getString("bom.returns").toLowerCase()) {
                case "vixml":
                    result.viXML = auxr;
                    break;
                case "wprima":
                    result.wprima = auxr;
                    break;
                case "record":
                    result.record = auxr;
                    break;
                default:
                    result.viXML = auxr;
                    break;
                }

                if (local) {
                    server.stop();
                }
            }
        } catch (WebDriverException e) {
            System.out.println("ERROR: Could not load " + url);
            if (config.getString("selenium.run.mode").equals(JPagelyzer.REMOTE)) {
                System.out.println("Can not connect to server " + config.getString("selenium.server.url"));
            }
            System.out.println("Trying to reinitialize browser");
            if (server != null)
                server.stop();
            System.out.println(e);
            try {
                this.browser.driver.close();
            } catch (WebDriverException ex) {
                System.out.println("ERROR: cannot close browser ");
            }

            try {
                Thread.sleep(TimeUnit.SECONDS.toMillis(10));
            } catch (InterruptedException e2) {
            }

            initWebDriver();
            return null;

        } catch (Throwable e) {
            if (server != null)
                server.stop();
            if (config.getString("selenium.run.mode").equals(JPagelyzer.REMOTE)) {
                System.out.println("Can not connect to server " + config.getString("selenium.server.url"));
            }
            System.out.println("ERROR: Could not load " + url);
            System.out.println(e);
            return null;
        }

        return (result);
    }

    /**
     * Invoke the capture process
     * @param url the web page to capture
     * @param screenshot indicates if the screenshot should be taken
     * @param segmentation indicates if the segmentation should be done
     */
    public void run(String url, boolean screenshot, boolean segmentation) {
        this.result = process(url, screenshot, segmentation);
    }
}