Example usage for org.jsoup.nodes Element parents

List of usage examples for org.jsoup.nodes Element parents

Introduction

In this page you can find the example usage for org.jsoup.nodes Element parents.

Prototype

public Elements parents() 

Source Link

Document

Get this element's parent and ancestors, up to the document root.

Usage

From source file:org.abondar.experimental.eventsearch.EventFinder.java

public String getEventPlaces(String place) {

    String res = "";
    try {//from www. j av  a  2  s .  c  o  m
        Document placeDoc = Jsoup.connect("https://afisha.yandex.ru" + place).get();

        Elements elems = placeDoc.select("p");

        for (Element e : elems) {

            if (e.parents().get(1).html().contains("<div style")) {

                if (e.children().size() > 1) {
                    if (e.child(1).hasAttr("href")) {
                        res = e.child(1).html() + " ?";

                    }
                } else if (e.children().isEmpty()) {
                    res = e.html() + " ?";
                }
            }

        }

    } catch (IOException ex) {
        Logger.getLogger(EventFinder.class.getName()).log(Level.SEVERE, null, ex);
    }
    return res;
}

From source file:com.astamuse.asta4d.render.RenderUtil.java

private final static boolean isBlockedByParentSnippet(Document doc, Element elem) {
    boolean isBlocked;
    String blockingId = elem.attr(ExtNodeConstants.SNIPPET_NODE_ATTR_BLOCK);
    if (blockingId.isEmpty()) {
        // empty block id means there is no parent snippet that need to be
        // aware. if the original block is from a embed template, it means
        // that all of the parent snippets have been finished or this
        // element would not be imported now.
        isBlocked = false;//from  w  w w .  j av  a 2  s .c om
    } else {
        String parentSelector = SelectorUtil.attr(ExtNodeConstants.SNIPPET_NODE_TAG_SELECTOR,
                ExtNodeConstants.ATTR_SNIPPET_REF, blockingId);
        Elements parentSnippetSearch = elem.parents().select(parentSelector);
        if (parentSnippetSearch.isEmpty()) {
            isBlocked = false;
        } else {
            Element parentSnippet = parentSnippetSearch.first();
            if (parentSnippet.attr(ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS)
                    .equals(ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS_FINISHED)) {
                isBlocked = false;
            } else {
                isBlocked = true;
            }
        }
    }
    return isBlocked;
}

From source file:com.serphacker.serposcope.scraper.google.scraper.GoogleScraper.java

protected boolean isSiteLinkElement(Element element) {
    if (element == null) {
        return false;
    }// www.  j a v  a  2  s  . c o  m

    Elements parents = element.parents();
    if (parents == null || parents.isEmpty()) {
        return false;
    }

    for (Element parent : parents) {
        if (parent.hasClass("mslg") || parent.hasClass("nrg") || parent.hasClass("nrgw")) {
            return true;
        }
    }

    return false;
}

From source file:org.apache.sling.hapi.client.forms.internal.FormValues.java

/**
 * @return/*w  w  w. ja  v a2 s.  c o  m*/
 * {@see http://www.w3.org/TR/html5/forms.html#constructing-the-form-data-set}
 */
private FormValues build() {
    for (Element input : form.select("button, input, select, textarea")) {
        String type = input.attr("type");

        if (input.hasAttr("disabled"))
            continue;
        if (input.tagName().equalsIgnoreCase("button") && !type.equals("submit"))
            continue;
        if (input.tagName().equalsIgnoreCase("input") && (type.equals("button") || type.equals("reset")))
            continue;
        if (type.equals("checkbox") && input.hasAttr("checked"))
            continue;
        if (type.equals("radio") && input.hasAttr("checked"))
            continue;
        if (!type.equals("image") && input.attr("name").length() == 0)
            continue;
        if (input.parents().is("datalist"))
            continue;

        if (type.equals("image") || type.equals("file"))
            continue; // don't support files for now
        String name = input.attr("name");

        if (input.tagName().equalsIgnoreCase("select")) {
            for (Element o : input.select("option[selected]")) {
                if (o.hasAttr("disabled"))
                    continue;
                list.add(name, new BasicNameValuePair(name, o.val()));
            }
        } else if (type.equals("checkbox") || type.equals("radio")) {
            String value = input.hasAttr("value") ? input.val() : "on";
            list.add(name, new BasicNameValuePair(name, value));
        } else {
            list.add(name, new BasicNameValuePair(name, input.val()));
        }
    }
    return this;
}

From source file:ExtractorContentTest.java

private void treatTable(Element table, List<Catalog> catalogs) {
    // 1. get section name

    Elements sect2 = table.parents().select("h2"); // section.getElementsByTag("h2") ; 
    String s2 = null;/*from  w w w.  j a  v  a2 s.  c o  m*/
    if (!sect2.isEmpty())
        s2 = sect2.first().text(); // FIXME what about more than 1 ?

    String s3 = null;
    Elements sect3 = table.parents().select("h3");
    if (!sect3.isEmpty())
        s3 = sect3.first().text();

    String dt = null;
    Elements sectDT = table.parents().select("p");
    if (!sectDT.isEmpty()) {
        String contentDT = sectDT.first().text();
        if (contentDT.startsWith(";"))
            dt = contentDT.replaceAll(";", "");
    }

    Elements caption = table.select("caption");
    String captionName = null;
    if (!caption.isEmpty())
        captionName = caption.first().text();

    // FIXME other forms of structural information 

    /*** 
     * Headers
     */
    //
    List<Header> rHeaders = collectHeaders(table);

    boolean sortable = !table.select("[class=sortable wikitable]").isEmpty()
            || !table.select("[class=wikitable sortable]").isEmpty();
    //   || !table.select("[class=sortable wikitable jquery-tablesorter]").isEmpty() ; 

    // FIXME: other cases
    Elements heads = table.select("thead");

    if (sortable && (!heads.isEmpty())) {
        rHeaders = collectHeaders(heads.first());
    }

    System.err.println("SORTABLE:" + sortable + " rHeaders=" + rHeaders);

    // 2 treat row               
    Catalog product = null;
    Tree<String> structuralInformation = mkStructuralInformation(s2, s3, dt, captionName);
    if (sortable) {
        product = treatRows(table.select("tbody").first(), structuralInformation, rHeaders, sortable);
    } else
        product = treatRows(table, structuralInformation, rHeaders, sortable);
    catalogs.add(product);

    // 

    // set the "ID" / names
    // clean up

    for (Catalog catalog : catalogs) {
        List<Product> toRemove = new ArrayList<Product>();
        for (Product p : catalog) {
            Header primaryHeader = p.getHeaders().get(0);
            p.setName(p.getValue(primaryHeader.getName()));

            // some products are headers (each value equals to header name)
            List<Header> headers = p.getHeaders();
            boolean isHeader = true;
            for (Header header : headers) {
                String hName = header.getName();
                String pValue = p.getValue(hName);
                if (pValue == null)
                    continue;
                if (!hName.contains(pValue)) {
                    isHeader = false;
                }
            }
            if (isHeader) {
                toRemove.add(p);
            }
        }
        if (!toRemove.isEmpty() && !catalog.isEmpty())
            catalog.removeAll(toRemove);
    }

}

From source file:org.asqatasun.rules.accessiweb22.Aw22Rule08031.java

/**
 * Checks recursively whether an element has a parent with a lang attribute
 *
 * @param el//from  www.j  a v  a  2  s  .  com
 * @return whether the element passed as argument has a parent with a lang
 * attribute
 */
private boolean isElementHasParentWithLang(Element el) {
    return CollectionUtils.containsAny(el.parents(), elementWithLang.get());
}

From source file:org.asqatasun.rules.elementselector.CaptchaElementSelector.java

/**
 * //  ww  w.  j  a  va 2 s . c o m
 * @param el
 * @return all the parents and the siblings of the element
 */
private Elements getSiblingsAndParents(Element el) {
    Elements siblingsAndParents = new Elements();
    siblingsAndParents.addAll(el.siblingElements());
    siblingsAndParents.addAll(el.parents());
    return siblingsAndParents;
}

From source file:org.asqatasun.rules.elementselector.ImageElementSelector.java

/**
 * /*from www.ja va2 s . c  o m*/
 * @param elementHandler 
 */
private void excludeLinksFromSelection(ElementHandler<Element> elementHandler) {
    if (!excludeCompositeLink && !excludeImageLink) {
        return;
    }
    ElementHandler<Element> linkHandler = new ElementHandlerImpl();
    for (Element el : elementHandler.get()) {
        Element link = el.parents().select(A_ELEMENT).first();
        if (excludeImageLink && isImageLink(link, el)) {
            linkHandler.add(el);
        } else if (excludeCompositeLink && isCompositeLink(link, el)) {
            linkHandler.add(el);
        }
    }
    elementHandler.removeAll(linkHandler);
}

From source file:org.asqatasun.rules.elementselector.LinkElementSelector.java

/**
 * //w w w.j av a2 s .  c  o m
 * @param linkElement
 * @param linkText
 * @return whether the current link have a context
 */
protected boolean doesLinkHaveContext(Element linkElement, String linkText) {
    // does the current link have a title attribute? 
    if (considerTitleAsContext && linkElement.hasAttr(TITLE_ATTR)
            && !StringUtils.equalsIgnoreCase(linkElement.attr(TITLE_ATTR), linkText)) {
        return true;
    }
    if (linkElement.hasAttr(ARIA_LABEL_ATTR) && StringUtils.isNotBlank(linkElement.attr(ARIA_LABEL_ATTR))) {
        return true;
    }
    if (linkElement.hasAttr(ARIA_LABELLEDBY_ATTR)
            && StringUtils.isNotBlank(linkElement.attr(ARIA_LABELLEDBY_ATTR))) {
        return true;
    }
    // does the parent of the current link have some text?
    if (StringUtils.isNotBlank(linkElement.parent().ownText())) {
        return true;
    }
    // does the current element have a previous sibling of heading type?
    if (isOneOfPrecedingSiblingofHeadingType(linkElement)) {
        return true;
    }
    // does one of the parent of the current element have a previous sibling 
    // of heading type or is found in the PARENT_CONTEXT_ELEMENTS list?
    for (Element parent : linkElement.parents()) {
        if (PARENT_CONTEXT_ELEMENTS.contains(parent.tagName())
                || isOneOfPrecedingSiblingofHeadingType(parent)) {
            return true;
        }
    }
    return false;
}

From source file:org.opens.tanaguru.rules.elementchecker.helper.RuleCheckHelper.java

/**
 * This methods parses all the elements retrieved from the scope, extracts
 * the ones where the occurrence "captcha" is found among the attribute values
 * and removes these elements from the initial set of elements.
 * //from   w w w . ja v a 2 s. c  o m
 * @param elements
 * @return 
 */
public static Elements extractCaptchaElements(Elements elements) {
    Elements captchaElements = new Elements();
    for (Element el : elements) {
        for (Attribute attr : el.attributes()) {
            if (StringUtils.containsIgnoreCase(attr.getValue(), CAPTCHA_KEYWORD)) {
                captchaElements.add(el);
                break;
            }
        }
        for (Element pel : el.parents()) {
            for (Attribute attr : pel.attributes()) {
                if (StringUtils.containsIgnoreCase(attr.getValue(), CAPTCHA_KEYWORD)) {
                    captchaElements.add(el);
                    break;
                }
            }
        }
    }
    elements.removeAll(captchaElements);
    return captchaElements;
}