List of usage examples for org.jsoup.nodes Element parents
public Elements parents()
From source file:org.abondar.experimental.eventsearch.EventFinder.java
public String getEventPlaces(String place) { String res = ""; try {//from www. j av a 2 s . c o m Document placeDoc = Jsoup.connect("https://afisha.yandex.ru" + place).get(); Elements elems = placeDoc.select("p"); for (Element e : elems) { if (e.parents().get(1).html().contains("<div style")) { if (e.children().size() > 1) { if (e.child(1).hasAttr("href")) { res = e.child(1).html() + " ?"; } } else if (e.children().isEmpty()) { res = e.html() + " ?"; } } } } catch (IOException ex) { Logger.getLogger(EventFinder.class.getName()).log(Level.SEVERE, null, ex); } return res; }
From source file:com.astamuse.asta4d.render.RenderUtil.java
private final static boolean isBlockedByParentSnippet(Document doc, Element elem) { boolean isBlocked; String blockingId = elem.attr(ExtNodeConstants.SNIPPET_NODE_ATTR_BLOCK); if (blockingId.isEmpty()) { // empty block id means there is no parent snippet that need to be // aware. if the original block is from a embed template, it means // that all of the parent snippets have been finished or this // element would not be imported now. isBlocked = false;//from w w w . j av a 2 s .c om } else { String parentSelector = SelectorUtil.attr(ExtNodeConstants.SNIPPET_NODE_TAG_SELECTOR, ExtNodeConstants.ATTR_SNIPPET_REF, blockingId); Elements parentSnippetSearch = elem.parents().select(parentSelector); if (parentSnippetSearch.isEmpty()) { isBlocked = false; } else { Element parentSnippet = parentSnippetSearch.first(); if (parentSnippet.attr(ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS) .equals(ExtNodeConstants.SNIPPET_NODE_ATTR_STATUS_FINISHED)) { isBlocked = false; } else { isBlocked = true; } } } return isBlocked; }
From source file:com.serphacker.serposcope.scraper.google.scraper.GoogleScraper.java
protected boolean isSiteLinkElement(Element element) { if (element == null) { return false; }// www. j a v a 2 s . c o m Elements parents = element.parents(); if (parents == null || parents.isEmpty()) { return false; } for (Element parent : parents) { if (parent.hasClass("mslg") || parent.hasClass("nrg") || parent.hasClass("nrgw")) { return true; } } return false; }
From source file:org.apache.sling.hapi.client.forms.internal.FormValues.java
/** * @return/*w w w. ja v a2 s. c o m*/ * {@see http://www.w3.org/TR/html5/forms.html#constructing-the-form-data-set} */ private FormValues build() { for (Element input : form.select("button, input, select, textarea")) { String type = input.attr("type"); if (input.hasAttr("disabled")) continue; if (input.tagName().equalsIgnoreCase("button") && !type.equals("submit")) continue; if (input.tagName().equalsIgnoreCase("input") && (type.equals("button") || type.equals("reset"))) continue; if (type.equals("checkbox") && input.hasAttr("checked")) continue; if (type.equals("radio") && input.hasAttr("checked")) continue; if (!type.equals("image") && input.attr("name").length() == 0) continue; if (input.parents().is("datalist")) continue; if (type.equals("image") || type.equals("file")) continue; // don't support files for now String name = input.attr("name"); if (input.tagName().equalsIgnoreCase("select")) { for (Element o : input.select("option[selected]")) { if (o.hasAttr("disabled")) continue; list.add(name, new BasicNameValuePair(name, o.val())); } } else if (type.equals("checkbox") || type.equals("radio")) { String value = input.hasAttr("value") ? input.val() : "on"; list.add(name, new BasicNameValuePair(name, value)); } else { list.add(name, new BasicNameValuePair(name, input.val())); } } return this; }
From source file:ExtractorContentTest.java
private void treatTable(Element table, List<Catalog> catalogs) { // 1. get section name Elements sect2 = table.parents().select("h2"); // section.getElementsByTag("h2") ; String s2 = null;/*from w w w. j a v a2 s. c o m*/ if (!sect2.isEmpty()) s2 = sect2.first().text(); // FIXME what about more than 1 ? String s3 = null; Elements sect3 = table.parents().select("h3"); if (!sect3.isEmpty()) s3 = sect3.first().text(); String dt = null; Elements sectDT = table.parents().select("p"); if (!sectDT.isEmpty()) { String contentDT = sectDT.first().text(); if (contentDT.startsWith(";")) dt = contentDT.replaceAll(";", ""); } Elements caption = table.select("caption"); String captionName = null; if (!caption.isEmpty()) captionName = caption.first().text(); // FIXME other forms of structural information /*** * Headers */ // List<Header> rHeaders = collectHeaders(table); boolean sortable = !table.select("[class=sortable wikitable]").isEmpty() || !table.select("[class=wikitable sortable]").isEmpty(); // || !table.select("[class=sortable wikitable jquery-tablesorter]").isEmpty() ; // FIXME: other cases Elements heads = table.select("thead"); if (sortable && (!heads.isEmpty())) { rHeaders = collectHeaders(heads.first()); } System.err.println("SORTABLE:" + sortable + " rHeaders=" + rHeaders); // 2 treat row Catalog product = null; Tree<String> structuralInformation = mkStructuralInformation(s2, s3, dt, captionName); if (sortable) { product = treatRows(table.select("tbody").first(), structuralInformation, rHeaders, sortable); } else product = treatRows(table, structuralInformation, rHeaders, sortable); catalogs.add(product); // // set the "ID" / names // clean up for (Catalog catalog : catalogs) { List<Product> toRemove = new ArrayList<Product>(); for (Product p : catalog) { Header primaryHeader = p.getHeaders().get(0); p.setName(p.getValue(primaryHeader.getName())); // some products are headers (each value equals to header name) List<Header> headers = p.getHeaders(); boolean isHeader = true; for (Header header : headers) { String hName = header.getName(); String pValue = p.getValue(hName); if (pValue == null) continue; if (!hName.contains(pValue)) { isHeader = false; } } if (isHeader) { toRemove.add(p); } } if (!toRemove.isEmpty() && !catalog.isEmpty()) catalog.removeAll(toRemove); } }
From source file:org.asqatasun.rules.accessiweb22.Aw22Rule08031.java
/** * Checks recursively whether an element has a parent with a lang attribute * * @param el//from www.j a v a 2 s . com * @return whether the element passed as argument has a parent with a lang * attribute */ private boolean isElementHasParentWithLang(Element el) { return CollectionUtils.containsAny(el.parents(), elementWithLang.get()); }
From source file:org.asqatasun.rules.elementselector.CaptchaElementSelector.java
/** * // ww w. j a va 2 s . c o m * @param el * @return all the parents and the siblings of the element */ private Elements getSiblingsAndParents(Element el) { Elements siblingsAndParents = new Elements(); siblingsAndParents.addAll(el.siblingElements()); siblingsAndParents.addAll(el.parents()); return siblingsAndParents; }
From source file:org.asqatasun.rules.elementselector.ImageElementSelector.java
/** * /*from www.ja va2 s . c o m*/ * @param elementHandler */ private void excludeLinksFromSelection(ElementHandler<Element> elementHandler) { if (!excludeCompositeLink && !excludeImageLink) { return; } ElementHandler<Element> linkHandler = new ElementHandlerImpl(); for (Element el : elementHandler.get()) { Element link = el.parents().select(A_ELEMENT).first(); if (excludeImageLink && isImageLink(link, el)) { linkHandler.add(el); } else if (excludeCompositeLink && isCompositeLink(link, el)) { linkHandler.add(el); } } elementHandler.removeAll(linkHandler); }
From source file:org.asqatasun.rules.elementselector.LinkElementSelector.java
/** * //w w w.j av a2 s . c o m * @param linkElement * @param linkText * @return whether the current link have a context */ protected boolean doesLinkHaveContext(Element linkElement, String linkText) { // does the current link have a title attribute? if (considerTitleAsContext && linkElement.hasAttr(TITLE_ATTR) && !StringUtils.equalsIgnoreCase(linkElement.attr(TITLE_ATTR), linkText)) { return true; } if (linkElement.hasAttr(ARIA_LABEL_ATTR) && StringUtils.isNotBlank(linkElement.attr(ARIA_LABEL_ATTR))) { return true; } if (linkElement.hasAttr(ARIA_LABELLEDBY_ATTR) && StringUtils.isNotBlank(linkElement.attr(ARIA_LABELLEDBY_ATTR))) { return true; } // does the parent of the current link have some text? if (StringUtils.isNotBlank(linkElement.parent().ownText())) { return true; } // does the current element have a previous sibling of heading type? if (isOneOfPrecedingSiblingofHeadingType(linkElement)) { return true; } // does one of the parent of the current element have a previous sibling // of heading type or is found in the PARENT_CONTEXT_ELEMENTS list? for (Element parent : linkElement.parents()) { if (PARENT_CONTEXT_ELEMENTS.contains(parent.tagName()) || isOneOfPrecedingSiblingofHeadingType(parent)) { return true; } } return false; }
From source file:org.opens.tanaguru.rules.elementchecker.helper.RuleCheckHelper.java
/** * This methods parses all the elements retrieved from the scope, extracts * the ones where the occurrence "captcha" is found among the attribute values * and removes these elements from the initial set of elements. * //from w w w . ja v a 2 s. c o m * @param elements * @return */ public static Elements extractCaptchaElements(Elements elements) { Elements captchaElements = new Elements(); for (Element el : elements) { for (Attribute attr : el.attributes()) { if (StringUtils.containsIgnoreCase(attr.getValue(), CAPTCHA_KEYWORD)) { captchaElements.add(el); break; } } for (Element pel : el.parents()) { for (Attribute attr : pel.attributes()) { if (StringUtils.containsIgnoreCase(attr.getValue(), CAPTCHA_KEYWORD)) { captchaElements.add(el); break; } } } } elements.removeAll(captchaElements); return captchaElements; }