Example usage for org.jsoup.nodes Element attr

List of usage examples for org.jsoup.nodes Element attr

Introduction

In this page you can find the example usage for org.jsoup.nodes Element attr.

Prototype

public String attr(String attributeKey) 

Source Link

Document

Get an attribute's value by its key.

Usage

From source file:edu.ucla.cs.scai.swim.qa.ontology.dbpedia.DBpediaOntologyOld.java

private void traverseHierarchy(Element e, DBpediaCategory category, HashMap<String, DBpediaCategory> map) {
    for (Element c : e.children()) {
        String tagName = c.tag().getName();
        if (tagName.equals("a")) {
            String href = c.attr("href");
            if (href != null && href.length() > 0) {
                category.setLabel(c.text());
                category.setUri(CLASSES_BASE_URI + c.text());
                map.put(category.getLabel(), category);
                System.out.println(c.text() + "\t" + CLASSES_BASE_URI + c.text());
            }/*from www.  ja  v a 2  s  .co m*/
        } else if (tagName.equals("ul")) {
            for (Element c1 : c.children()) {
                if (c1.tagName().equals("li")) {
                    DBpediaCategory cc = new DBpediaCategory();
                    traverseHierarchy(c1, cc, map);
                    cc.parents = new HashSet<>();
                    cc.parents.add(category);
                    category.getSubClasses().add(cc);
                }
            }
        }
    }
}

From source file:qhindex.controller.SearchAuthorWorksController.java

private AuthorWork extractAuthorWorkData(Element authorWorkElements) throws IOException {
    AuthorWork aw = new AuthorWork();
    Element titleElem = authorWorkElements.select("td.gsc_a_t > a").get(0);
    String name = titleElem.text();
    aw.setTitle(name);/*  w  ww .ja va 2 s  . co  m*/
    String urlAuthorWork = titleElem.attr("href");
    Elements workData = authorWorkElements.select("td.gsc_a_t > div");
    if (workData.size() > 1) {
        String publisherInGoogle = workData.get(1).text();
        aw.setPublisherInGoogle(publisherInGoogle);
        aw.setPublisher(handlePublicationMedium(publisherInGoogle, urlAuthorWork));

        String authors = workData.get(0).text();
        aw.setAuthors(authors);
    }
    Elements citationsData = authorWorkElements.select("td.gsc_a_c > a");
    if (citationsData.size() > 0) {
        aw.setCitationsUrl(citationsData.get(0).attr("href"));
        int cititationsExtractedNumber = 0;
        try {
            String citationStr = citationsData.get(0).text();
            if (citationStr.length() > 0) {
                cititationsExtractedNumber = Integer.parseInt(citationStr);
            }
        } catch (Exception ex) {
            Debug.print("Exception while extracting author work data: " + ex.toString());
            resultsMsg += "Exception while extracting author work data.\n";
        }
        aw.setCitations(cititationsExtractedNumber);
    }
    return aw;
}

From source file:cn.wanghaomiao.xpath.core.XpathEvaluator.java

/**
 * ?xpath/*ww w .  ja v  a 2 s .  c om*/
 *
 * @param xpath
 * @param root
 * @return
 */
public List<JXNode> evaluate(String xpath, Elements root) throws NoSuchAxisException, NoSuchFunctionException {
    List<JXNode> res = new LinkedList<JXNode>();
    Elements context = root;
    List<Node> xpathNodes = getXpathNodeTree(xpath);
    for (int i = 0; i < xpathNodes.size(); i++) {
        Node n = xpathNodes.get(i);
        LinkedList<Element> contextTmp = new LinkedList<Element>();
        if (n.getScopeEm() == ScopeEm.RECURSIVE || n.getScopeEm() == ScopeEm.CURREC) {
            if (n.getTagName().startsWith("@")) {
                for (Element e : context) {
                    //?
                    String key = n.getTagName().substring(1);
                    if (key.equals("*")) {
                        res.add(JXNode.t(e.attributes().toString()));
                    } else {
                        String value = e.attr(key);
                        if (StringUtils.isNotBlank(value)) {
                            res.add(JXNode.t(value));
                        }
                    }
                    //??
                    for (Element dep : e.getAllElements()) {
                        if (key.equals("*")) {
                            res.add(JXNode.t(dep.attributes().toString()));
                        } else {
                            String value = dep.attr(key);
                            if (StringUtils.isNotBlank(value)) {
                                res.add(JXNode.t(value));
                            }
                        }
                    }
                }
            } else if (n.getTagName().endsWith("()")) {
                //??text()
                res.add(JXNode.t(context.text()));
            } else {
                Elements searchRes = context.select(n.getTagName());
                for (Element e : searchRes) {
                    Element filterR = filter(e, n);
                    if (filterR != null) {
                        contextTmp.add(filterR);
                    }
                }
                context = new Elements(contextTmp);
                if (i == xpathNodes.size() - 1) {
                    for (Element e : contextTmp) {
                        res.add(JXNode.e(e));
                    }
                }
            }

        } else {
            if (n.getTagName().startsWith("@")) {
                for (Element e : context) {
                    String key = n.getTagName().substring(1);
                    if (key.equals("*")) {
                        res.add(JXNode.t(e.attributes().toString()));
                    } else {
                        String value = e.attr(key);
                        if (StringUtils.isNotBlank(value)) {
                            res.add(JXNode.t(value));
                        }
                    }
                }
            } else if (n.getTagName().endsWith("()")) {
                res = (List<JXNode>) callFunc(n.getTagName().substring(0, n.getTagName().length() - 2),
                        context);
            } else {
                for (Element e : context) {
                    Elements filterScope = e.children();
                    if (StringUtils.isNotBlank(n.getAxis())) {
                        filterScope = getAxisScopeEls(n.getAxis(), e);
                    }
                    for (Element chi : filterScope) {
                        Element fchi = filter(chi, n);
                        if (fchi != null) {
                            contextTmp.add(fchi);
                        }
                    }
                }
                context = new Elements(contextTmp);
                if (i == xpathNodes.size() - 1) {
                    for (Element e : contextTmp) {
                        res.add(JXNode.e(e));
                    }
                }
            }
        }
    }
    return res;
}

From source file:gov.medicaid.screening.dao.impl.SocialWorkLicenseDAOBean.java

/**
 * Retrieves all results from the source site.
 *
 * @param searchCriteria the search criteria.
 * @return the providers matched/*from   w w  w . j  a  v a 2 s.com*/
 * @throws URISyntaxException if the URL could not be correctly constructed
 * @throws IOException for any I/O related errors
 * @throws ServiceException for any other errors encountered
 */
private SearchResult<License> getAllResults(SocialWorkCriteria searchCriteria)
        throws URISyntaxException, IOException, ServiceException {
    DefaultHttpClient client = new DefaultHttpClient(getLaxSSLConnectionManager());
    client.setRedirectStrategy(new LaxRedirectStrategy());

    HttpGet getSearchPage = new HttpGet(new URIBuilder(getSearchURL()).build());
    HttpResponse response = client.execute(getSearchPage);
    verifyAndAuditCall(getSearchURL(), response);

    Document page = Jsoup.parse(EntityUtils.toString(response.getEntity()));

    String licenseNo = "";
    if (searchCriteria instanceof SocialWorkLicenseSearchByLicenseNumberCriteria) {
        licenseNo = "" + ((SocialWorkLicenseSearchByLicenseNumberCriteria) searchCriteria).getLicenseNumber();
    }
    String level = "none";
    if (searchCriteria.getLevel() != null) {
        level = Util.defaultString(searchCriteria.getLevel().getName());
    }

    HttpPost search = new HttpPost(new URIBuilder(getSearchURL()).build());
    HttpEntity entity = postForm(getSearchURL(), client, search,
            buildParams(searchCriteria, page, licenseNo, level, null), true);

    page = Jsoup.parse(EntityUtils.toString(entity));

    List<License> allLicenses = new ArrayList<License>();
    // check if detail page (single match)
    if (page.select("#lblFormTitle").text().equals("License Details")) {
        allLicenses.add(parseLicenseDetail(page));
    } else {

        Elements rows = page.select(RESULT_ROWS_SELECTOR);
        while (rows.size() > 0) {
            for (Element row : rows) {
                License license = parseLicense(row.children());
                if (license != null) {
                    allLicenses.add(license);
                }
            }
            rows.clear();

            // check for next page
            Element currentPage = page.select("#_ctl7_grdSearchResults tr.TablePager span").first();
            getLog().log(Level.DEBUG, "Current page is: " + currentPage.text());
            Element pageLink = currentPage.nextElementSibling();
            if (pageLink != null && pageLink.hasAttr("href")) {
                getLog().log(Level.DEBUG, "There are more results, getting the next page.");

                String target = parseEventTarget(pageLink.attr("href"));
                entity = postForm(getSearchURL(), client, search,
                        buildParams(searchCriteria, page, licenseNo, level, target), true);
                page = Jsoup.parse(EntityUtils.toString(entity));
                rows = page.select(RESULT_ROWS_SELECTOR);
            }
        }
    }

    SearchResult<License> results = new SearchResult<License>();
    results.setItems(allLicenses);
    return results;
}

From source file:com.gumtreescraper.scraper.GumtreeScraper.java

public void scrapeWithJSoup(List<Gumtree> gumtrees, String url) throws IOException {

    //        openSite(url);
    //            waitForPageToLoad();

    String nextPageUrl = url;/*ww  w .ja  v a  2 s . c o m*/
    boolean needContinue = true;
    do {

        try {
            Document doc = Jsoup.connect(nextPageUrl).timeout(getTimeout() * 1000).userAgent("Mozilla")
                    //                     .userAgent("Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36")
                    .get();
            Elements adElements = doc.select("#srchrslt-adtable > li");
            int size = adElements.size();
            for (int i = 0; i < size; i++) {
                Element ad = adElements.get(i);

                if (!isOwner(ad)) {
                    continue;
                }

                Element linkElement = ad.select("h6.rs-ad-title > a").first();

                if (linkElement == null) {
                    System.out.print(ad);
                    continue;
                }

                String adUrl = linkElement.attr("href");
                Gumtree gumtree = new Gumtree();
                gumtree.setUrl(BASE_URL + adUrl);
                gumtrees.add(gumtree);

                if (i == size - 1) { // last element
                    Elements adDateElements = ad.select("div.rs-ad-date");
                    if (adDateElements.isEmpty()) {
                        continue;
                    }

                    if (!needToScrapeNextPage(adDateElements.first().text().trim())) {
                        needContinue = false;
                    }
                }
            }

            Elements nextElements = doc.select("a.rs-paginator-btn.next");
            if (nextElements.isEmpty()) {
                break;
            }

            nextPageUrl = BASE_URL + nextElements.first().attr("href");
            System.out.println("next page: " + nextPageUrl);
        } catch (Exception oex) {
            System.out.println(oex);
        }
    } while (true && needContinue);
}

From source file:net.GoTicketing.GoTicketing.java

/**
 * ??//from w  w  w . j a  va 2s . co  m
 * @throws Exception 
 */
private void praseImageCaptchaSrc() throws Exception {
    Document doc = Jsoup.parse(TicketingPageHTML);
    Element img = doc.getElementById("idRandomPic");
    if (img == null)
        throw new Exception("Can't get image captcha source !");

    //out.println(host + img.attr("src"));
    ImageCaptchaSrc = host + img.attr("src");
}

From source file:se.vgregion.portal.iframe.controller.CSViewController.java

private void addSpecialFieldForRaindance(Map<String, String> dynamicFieldValueMap, PortletConfig portletConfig)
        throws Exception {
    Document doc = getDynamicFieldsDocument(portletConfig);
    Element button = findButtonWithIdWhichStartsWith(doc, "loginForm:j_idt");
    dynamicFieldValueMap.put("loginForm:j_idcl", button.attr("id"));
}

From source file:mml.handler.post.MMLPostHTMLHandler.java

/**
 * Parse a paragraph. These may be "p" or "hN" elements, often with classes
 * @param p the paragraph/heading element from the document fragment
 * @param defaultName the default name for the property
 *///from  w  w w . j  av a2 s .  c  o m
private void parsePara(Element p, String defaultName) throws JSONException {
    List<Node> children = p.childNodes();
    String name = p.attr("class");
    if (name == null || name.length() == 0)
        name = defaultName;
    if (isLineFormat(name) || prevWasMilestone)
        ensure(1, false);
    else
        ensure(2, true);
    int offset = sb.length();
    Range r = new Range(name, offset, 0);
    stil.add(r);
    for (Node child : children) {
        if (child instanceof Element) {
            String nName = child.nodeName().toLowerCase();
            if (nName.equals("span"))
                parseSpan((Element) child);
            else
                parseOtherElement((Element) child);
        } else if (child instanceof TextNode) {
            TextNode tn = (TextNode) child;
            sb.append(tn.getWholeText());
        }
    }
    if (isLineFormat(name))
        ensure(1, true);
    else
        ensure(2, true);
    this.stil.updateLen(r, sb.length() - offset);
    prevWasMilestone = false;
}

From source file:mml.handler.post.MMLPostHTMLHandler.java

/**
 * Parse a codeblock/* w  w  w  . j  a  v  a 2  s  . c  o m*/
 * @param elem the element to parse
 * @throws a JSON exception
 */
private void parsePre(Element elem) throws JSONException {
    if (elem.hasText()) {
        int offset = sb.length();
        String name = elem.attr("class");
        if (name == null || name.length() == 0)
            name = "pre";
        Range r = new Range(name, offset, 0);
        stil.add(r);
        if (elem.hasAttr("class")) {
            List<Node> children = elem.childNodes();
            for (Node child : children) {
                if (child instanceof Element) {
                    if (child.nodeName().equals("span"))
                        parseSpan((Element) child);
                    else
                        parseOtherElement((Element) child);
                } else if (child instanceof TextNode)
                    sb.append(((TextNode) child).getWholeText());
            }
        } else
            sb.append(elem.text());
        this.stil.updateLen(r, sb.length() - offset);
    }
    prevWasMilestone = false;
    ensure(1, false);
}

From source file:com.github.binlee1990.spider.movie.spider.MovieCrawler.java

private void setFilmCover(Document doc, Film film) {
    Elements coverElements = doc.select(".fm-intro img[src]");
    if (CollectionUtils.isNotEmpty(coverElements)) {
        Element coverElement = coverElements.get(0);
        String coverUrl = coverElement.attr("src").toString();
        if (StringUtils.isNotBlank(coverUrl)) {
            film.setCoverUrl(coverUrl);/*from w  ww .j  a v a 2  s.  c om*/
        }
    }
}