com.smash.revolance.ui.explorer.PageParser.java Source code

Java tutorial

Introduction

Here is the source code for com.smash.revolance.ui.explorer.PageParser.java

Source

package com.smash.revolance.ui.explorer;

/*
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 * Revolance-UI-Explorer
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 * Copyright (C) 2012 - 2013 RevoLance
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as
 * published by the Free Software Foundation, either version 3 of the 
 * License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public 
 * License along with this program.  If not, see
 * <http://www.gnu.org/licenses/gpl-3.0.html>.
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */

import com.smash.revolance.ui.model.bot.Bot;
import com.smash.revolance.ui.model.element.api.Button;
import com.smash.revolance.ui.model.element.api.Data;
import com.smash.revolance.ui.model.element.api.Element;
import com.smash.revolance.ui.model.element.api.Link;
import com.smash.revolance.ui.model.helper.BotHelper;
import com.smash.revolance.ui.model.helper.ImageHelper;
import com.smash.revolance.ui.model.page.api.Page;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.openqa.selenium.StaleElementReferenceException;
import org.openqa.selenium.WebElement;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

/**
 * User: wsmash
 * Date: 02/06/13
 * Time: 12:35
 */
public class PageParser {

    private Page page;

    public PageParser(Page page) {
        this.page = page;
    }

    private List<Element> parseContent() throws Exception {
        page.getUser().getLogger().log(Level.INFO, "Parsing elements");
        long mark = System.currentTimeMillis();

        List<Element> content = _parseContent();

        logClickableContent(content);

        long duration = (System.currentTimeMillis() - mark) / 1000;
        page.getUser().getLogger().log(Level.INFO, "Parsing elements [Done] [Duration: " + duration + " sec]");

        return content;
    }

    private void logClickableContent(List<Element> content) {
        // List<Element> clickableContent = Element.filterClickableElements( content );
        if (!content.isEmpty()) {
            page.getUser().getLogger().log(Level.INFO, "Clickable content found: ");
            for (Element element : content) {
                if (element instanceof Link || element instanceof Button) {
                    page.getUser().getLogger().log(Level.INFO, "--|  " + element.getContent());
                }
            }
        } else {
            page.getUser().getLogger().log(Level.INFO, "No clickable content has been found.");
        }
    }

    private List<Element> _parseContent() throws Exception {
        List<Element> content = new ArrayList();
        if (!page.isExternal() && !page.isBroken()) {
            // retrieve all the elements of the html body
            content = getElements();
            // filter the elements included in each other (optimization)
            _filterElementsIncludedInEachOther(content);
            // takes screenshot of all the content
            takeScreenshots(content);
            // only for convenience to be able to track the click sequence
            Collections.sort(content);
        }

        page.setContent(content);
        return content;
    }

    private void _filterElementsIncludedInEachOther(List<Element> content) throws Exception {
        long mark = System.currentTimeMillis();
        page.getUser().getLogger().log(Level.INFO, "Filtering page elements");
        Element.filterElementsIncludedInEachOthers(content, page.getArea() * 0.95, 1 / 8);

        long duration = (System.currentTimeMillis() - mark) / 1000;
        page.getUser().getLogger().log(Level.INFO,
                "Filtering page elements [Done] [Duration: " + duration + "sec]");
        page.getUser().getLogger().log(Level.INFO, "Found: " + content.size() + " pertinent elements");
    }

    public String takeScreenShot() throws Exception {
        Bot bot = page.getUser().getBot();
        if (page.getCaption().isEmpty()) {
            //            if(getTitle().isEmpty())
            //            {
            //                setTitle( getBot().getCurrentTitle() );
            //            }

            if (page.getUser().isPageScreenshotEnabled()) {
                page.getUser().getLogger().log(Level.INFO, "Taking page snapshot: '" + page.getTitle() + "'");
                long mark = System.currentTimeMillis();

                String img = BotHelper.takeScreenshot(bot);

                if (img != null) {
                    // update the image and the caption
                    page.setImage(ImageHelper.decodeToImage(img));
                    page.setScreenshotTaken(true);
                }

                long duration = (System.currentTimeMillis() - mark) / 1000;
                page.getUser().getLogger().log(Level.INFO,
                        "Taking page snapshot: '" + page.getTitle() + "' [Done] [Duration: " + duration + " sec]");
            }

        }
        return page.getCaption();
    }

    private void takeScreenshots(List<Element> content) throws Exception {
        if (page.getUser().isPageScreenshotEnabled() && page.getUser().isPageElementScreenshotEnabled()) {
            long mark = System.currentTimeMillis();

            int contentIdx = 0;
            for (Element pageElement : content) {
                contentIdx++;
                page.getUser().getLogger().log(Level.INFO,
                        String.format("Taking element screenshots ( %d / %d )", contentIdx, content.size()));
                pageElement.takeScreenShot();
            }

            long duration = (System.currentTimeMillis() - mark) / 1000;
            page.getUser().getLogger().log(Level.INFO,
                    "Taking elements screenshots [Done] [Duration: " + duration + "sec]");
        }
    }

    private int getHeight(Bot bot) throws Exception {
        //Object o = bot.runJS( "return Math.max(document.body.clientHeight, window.innerHeight)" );
        Object o = bot.runJS(
                "var D = document; return Math.max(D.body.scrollHeight, D.documentElement.scrollHeight,D.body.offsetHeight, D.documentElement.offsetHeight,D.body.clientHeight, D.documentElement.clientHeight);");
        if (o == null) {
            return page.getUser().getBrowserHeight();
        }
        return Integer.parseInt(String.valueOf((Long) o));
    }

    private int getWidth(Bot bot) throws Exception {
        //Object o = bot.runJS( "return Math.max(document.body.clientWidth, window.innerWidth)" );
        Object o = bot.runJS(
                "var D = document; return Math.max(D.body.scrollWidth, D.documentElement.scrollWidth,D.body.offsetWidth, D.documentElement.offsetWidth,D.body.clientWidth, D.documentElement.clientWidth);");
        if (o == null) {
            return page.getUser().getBrowserWidth();
        }
        return Integer.parseInt(String.valueOf((Long) o));
    }

    public void parse() throws Exception {
        Logger logger = page.getUser().getLogger();

        if (!page.hasBeenParsed() && !page.isExternal()) {

            if (page.getUser().getCurrentPage() != page) {
                page.getUser().goTo(page).awaitLoaded();
            }

            page.setWidth(getWidth(page.getUser().getBot()));
            page.setHeight(getHeight(page.getUser().getBot()));

            if (page.getApplication().isPageBroken(page)) {
                page.setBroken(true);
            }

            if (!page.getApplication().isAuthorized(page)) {
                page.setAuthorized(false);
            }

            if (page.getUser().isPageScreenshotEnabled() && page.getCaption().isEmpty()) {
                takeScreenShot();
            }

            if (!page.isBroken() && page.isEmpty()) {
                parseContent();
            } else {
                logger.log(Level.WARN, "Page with url: '" + page.getUrl() + "' is broken.");
            }

            page.setParsed(true);
        } else if (page.isExternal()) {
            logger.log(Level.WARN, "Page with url: '" + page.getUrl() + "' is out of the domain: '"
                    + page.getUser().getDomain() + "'.");
        }
    }

    public List<Element> getElements() throws Exception {
        final Logger logger = page.getUser().getLogger();

        long mark = System.currentTimeMillis();

        List<Element> elements = new ArrayList<Element>();
        List<WebElement> webElements = BotHelper.getRawElements(page.getUser().getBot(), page);
        int idx = 0;
        int elementCount = webElements.size();
        for (WebElement element : webElements) {
            idx++;
            try {
                if (element.isDisplayed()) {
                    Class<? extends Element> elemImpl = Element.getImplementation(element);
                    if (elemImpl != null) {
                        Element elem = elemImpl.getConstructor(Page.class, WebElement.class).newInstance(page,
                                element);

                        if (elem.getArea() > 0) {
                            // handleAddition( elements, elem );
                            elements.add(elem);
                        }

                    }
                }
            } catch (StaleElementReferenceException e) {
                logger.log(Level.ERROR, e);
            } finally {
                logger.log(Level.INFO, "Retrieving page element ( " + idx + "/" + elementCount + " )");
            }
        }

        long duration = (System.currentTimeMillis() - mark) / 1000;
        logger.log(Level.INFO, "Retrieving page elements [Done] [Duration: " + duration + "sec]");
        return elements;
    }

    public static void handleAddition(List<Element> elements, Element elem) {
        boolean isToAdd = true;
        List<Element> toBeRemoved = new ArrayList<Element>();

        if (elem instanceof Data && elem.getContent().isEmpty()) {
            return;
        } else if (elem instanceof Button || elem instanceof Link || elem instanceof Data) {

            for (Element element : elements) {

                if (element.isIncluded(elem) || elem.isIncluded(element)) {
                    if (elem instanceof Data && element instanceof Data) {
                        if (elem.getArea() < element.getArea()) {
                            toBeRemoved.add(element);
                        } else {
                            isToAdd = false;
                        }
                    } else if (elem instanceof Data && (element instanceof Link || element instanceof Button)) {
                        isToAdd = false;
                    } else if (elem instanceof Button || elem instanceof Link) {
                        toBeRemoved.add(element);
                    }
                    // Optimisation since we're calling this at each addition & we add one element at a time
                    // there is no need to run the complete loop
                    break;
                }

            }
        }

        elements.removeAll(toBeRemoved);
        if (isToAdd) {
            elements.add(elem);
        }
    }

}