Java tutorial
/* * Tanaguru - Automated webpage assessment * Copyright (C) 2008-2013 Open-S Company * * This file is part of Tanaguru. * * Tanaguru is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * * Contact us by mail: open-s AT open-s DOT com */ package org.opens.tanaguru.rules.elementselector; import java.util.HashSet; import java.util.Set; import org.apache.commons.lang3.StringUtils; import org.jsoup.nodes.Attribute; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import org.opens.tanaguru.processor.SSPHandler; import org.opens.tanaguru.ruleimplementation.ElementHandler; import static org.opens.tanaguru.rules.keystore.HtmlElementStore.BODY_ELEMENT; import static org.opens.tanaguru.rules.keystore.HtmlElementStore.HTML_ELEMENT; /** * Element selector implementation that searches the "captcha" occurence * on the page and determines whether this occurrence is handled by an element * implied by the test. * */ public class CaptchaElementSelector implements ElementSelector { /** the captcha key */ private static final String CAPTCHA_KEY = "captcha"; /* The css query used to retrieve Elements */ private ElementSelector elementSelector; /** the pre-selected image elements */ private ElementHandler imageHandler; /** * @param elementSelector */ public CaptchaElementSelector(ElementSelector elementSelector) { this.elementSelector = elementSelector; } /** * @param imageHandler */ public CaptchaElementSelector(ElementHandler imageHandler) { this.imageHandler = imageHandler; } @Override public void selectElements(SSPHandler sspHandler, ElementHandler selectionHandler) { if (!StringUtils.containsIgnoreCase(sspHandler.getSSP().getDOM(), CAPTCHA_KEY)) { return; } if (elementSelector != null) { elementSelector.selectElements(sspHandler, selectionHandler); } else if (imageHandler != null) { selectionHandler.addAll(imageHandler.get()); } extractCaptchaElements(selectionHandler); } /** * This methods parses all the elements retrieved from the scope and extracts * the ones where the occurrence "captcha" is found among the attribute values * * @param selectionHandler */ public void extractCaptchaElements(ElementHandler<Element> selectionHandler) { if (selectionHandler.isEmpty()) { return; } Set<Element> captchaElements = new HashSet<Element>(); for (Element el : selectionHandler.get()) { if (parseAttributeToExtractCaptcha(el)) { captchaElements.add(el); } else { for (Element sel : getSiblingsAndParents(el)) { if (!el.nodeName().equalsIgnoreCase(sel.nodeName()) && parseAttributeToExtractCaptcha(sel)) { captchaElements.add(el); break; } } } } selectionHandler.clean(); for (Element el : captchaElements) { selectionHandler.add(el); } } /** * * @param el * @return all the parents and the siblings of the element */ private Elements getSiblingsAndParents(Element el) { Elements siblingsAndParents = new Elements(); siblingsAndParents.addAll(el.siblingElements()); siblingsAndParents.addAll(el.parents()); return siblingsAndParents; } /** * * @param element * @return wheter either one attribute of the current element, either its * text, either one attribute of one of its parent or the text of one of * its parents contains the "captcha" keyword */ private boolean parseAttributeToExtractCaptcha(Element element) { if (element.nodeName().equalsIgnoreCase(HTML_ELEMENT) || element.nodeName().equalsIgnoreCase(BODY_ELEMENT)) { return false; } if (StringUtils.containsIgnoreCase(element.ownText(), CAPTCHA_KEY)) { return true; } else { for (Attribute attr : element.attributes()) { if (StringUtils.containsIgnoreCase(attr.getValue(), CAPTCHA_KEY)) { return true; } } } return false; } }