Example usage for org.jsoup.nodes Element children

List of usage examples for org.jsoup.nodes Element children

Introduction

In this page you can find the example usage for org.jsoup.nodes Element children.

Prototype

public Elements children() 

Source Link

Document

Get this element's child elements.

Usage

From source file:gov.medicaid.screening.dao.impl.OptometryLicenseDAOBean.java

/**
 * Performs a search for all possible results.
 *
 * @param identifier The value to be searched.
 * @return the search result for licenses
 * @throws URISyntaxException When an error occurs while building the URL.
 * @throws ClientProtocolException When client does not support protocol used.
 * @throws IOException When an error occurs while parsing response.
 * @throws ParseException When an error occurs while parsing response.
 * @throws PersistenceException for database related errors
 * @throws ServiceException for any other problems encountered
 *///ww w .  j  a  va 2s.c  om
private SearchResult<License> getAllResults(String identifier) throws URISyntaxException,
        ClientProtocolException, IOException, ParseException, PersistenceException, ServiceException {
    DefaultHttpClient client = new DefaultHttpClient();
    URIBuilder builder = new URIBuilder(getSearchURL()).setPath("/Default.aspx");
    String hostId = builder.build().toString();
    builder.setParameter("tabid", "799");

    HttpGet httpget = new HttpGet(builder.build());
    HttpResponse landing = client.execute(httpget);
    Document document = Jsoup.parse(EntityUtils.toString(landing.getEntity()));

    HttpPost httppost = new HttpPost(builder.build());
    HttpEntity entity = postForm(hostId, client, httppost,
            new String[][] { { "_ctl0:_ctl1:_ctl0:txtCriteria", identifier },
                    { "_ctl0:_ctl1:_ctl0:btnSubmit", "Search" }, { "__EVENTTARGET", "" },
                    { "__EVENTARGUMENT", "" },
                    { "__VIEWSTATE", document.select("#Form input[name=__VIEWSTATE]").first().val() } },
            true);

    // licenses list
    List<License> licenseList = new ArrayList<License>();
    while (entity != null) {
        String result = EntityUtils.toString(entity);
        document = Jsoup.parse(result);

        Elements trs = document.select("table.Datagrid tr");
        if (trs != null) {
            for (Element element : trs) {
                String cssClass = element.attr("class");
                if (!"DatagridHeaderStyle".equals(cssClass.trim()) && element.children().size() == 8) {
                    Elements tds = element.children();
                    licenseList.add(parseLicense(tds));
                }
            }
        }

        // done, check if there are additional results
        entity = null;
        Elements elements = document.getElementsByTag("a");
        for (Element element : elements) {
            if (element.text().equals("Next >>")) {
                entity = postForm(hostId, client, httppost,
                        new String[][] { { "_ctl0:_ctl1:_ctl0:txtCriteria", identifier },
                                { "__EVENTTARGET", "_ctl0:_ctl1:_ctl0:dgrdLicensee:_ctl29:_ctl1" },
                                { "__EVENTARGUMENT", "" },
                                { "__VIEWSTATE",
                                        document.select("#Form input[name=__VIEWSTATE]").first().val() } },
                        true);
                break;
            }
        }
    }

    SearchResult<License> result = new SearchResult<License>();
    result.setItems(licenseList);
    return result;
}

From source file:gov.medicaid.screening.dao.impl.SocialWorkLicenseDAOBean.java

/**
 * Retrieves all results from the source site.
 *
 * @param searchCriteria the search criteria.
 * @return the providers matched/*from  www  . j  ava2 s  . c  om*/
 * @throws URISyntaxException if the URL could not be correctly constructed
 * @throws IOException for any I/O related errors
 * @throws ServiceException for any other errors encountered
 */
private SearchResult<License> getAllResults(SocialWorkCriteria searchCriteria)
        throws URISyntaxException, IOException, ServiceException {
    DefaultHttpClient client = new DefaultHttpClient(getLaxSSLConnectionManager());
    client.setRedirectStrategy(new LaxRedirectStrategy());

    HttpGet getSearchPage = new HttpGet(new URIBuilder(getSearchURL()).build());
    HttpResponse response = client.execute(getSearchPage);
    verifyAndAuditCall(getSearchURL(), response);

    Document page = Jsoup.parse(EntityUtils.toString(response.getEntity()));

    String licenseNo = "";
    if (searchCriteria instanceof SocialWorkLicenseSearchByLicenseNumberCriteria) {
        licenseNo = "" + ((SocialWorkLicenseSearchByLicenseNumberCriteria) searchCriteria).getLicenseNumber();
    }
    String level = "none";
    if (searchCriteria.getLevel() != null) {
        level = Util.defaultString(searchCriteria.getLevel().getName());
    }

    HttpPost search = new HttpPost(new URIBuilder(getSearchURL()).build());
    HttpEntity entity = postForm(getSearchURL(), client, search,
            buildParams(searchCriteria, page, licenseNo, level, null), true);

    page = Jsoup.parse(EntityUtils.toString(entity));

    List<License> allLicenses = new ArrayList<License>();
    // check if detail page (single match)
    if (page.select("#lblFormTitle").text().equals("License Details")) {
        allLicenses.add(parseLicenseDetail(page));
    } else {

        Elements rows = page.select(RESULT_ROWS_SELECTOR);
        while (rows.size() > 0) {
            for (Element row : rows) {
                License license = parseLicense(row.children());
                if (license != null) {
                    allLicenses.add(license);
                }
            }
            rows.clear();

            // check for next page
            Element currentPage = page.select("#_ctl7_grdSearchResults tr.TablePager span").first();
            getLog().log(Level.DEBUG, "Current page is: " + currentPage.text());
            Element pageLink = currentPage.nextElementSibling();
            if (pageLink != null && pageLink.hasAttr("href")) {
                getLog().log(Level.DEBUG, "There are more results, getting the next page.");

                String target = parseEventTarget(pageLink.attr("href"));
                entity = postForm(getSearchURL(), client, search,
                        buildParams(searchCriteria, page, licenseNo, level, target), true);
                page = Jsoup.parse(EntityUtils.toString(entity));
                rows = page.select(RESULT_ROWS_SELECTOR);
            }
        }
    }

    SearchResult<License> results = new SearchResult<License>();
    results.setItems(allLicenses);
    return results;
}

From source file:com.bdx.rainbow.service.etl.analyze.SYJHttpAnalyze.java

/**
 * ???//from   w w  w .java2 s  .com
 * 
 * @param eleTrs
 * @param rowNo
 * @return
 */
private String parseDetailTr(Element eleTr) throws Exception {
    Element eleTd = eleTr.select("td").get(1);

    // td
    if (eleTd.children().size() > 0) {
        return eleTd.child(0).html();
    } else {
        return eleTd.html().trim();
    }
}

From source file:com.blackducksoftware.tools.nrt.generator.NRTReportGenerator.java

/**
 * Copies the HTML template into the finalHtmlOutput then injects the
 * generates JSON data into the specific div location and writes it out.
 * /*from w w w  .j av a 2  s  .com*/
 * @param expectedFile
 */
public void generateHTMLFromTemplate(File finalHtmlOutput) {

    log.info("Writing to report: " + finalHtmlOutput);
    String jsonComponentList = generateJSONFromObject(componentMap);
    String jsonPropertyList = generateJSONFromObject(nrtConfig.getOptionsForExport());
    // Construct a variable out of it
    jsonComponentList = "var compList=[" + jsonComponentList + "]";
    jsonPropertyList = "var propList=[" + jsonPropertyList + "]";

    PrintWriter writer = null;
    try {
        // Read the template
        Document doc = Jsoup.parse(finalHtmlOutput, "UTF-8");

        // Inject the JSON
        Elements jsonElementDivBlock = doc.getElementsByClass(NRTConstants.HTML_JSON_DATA_BLOCK);

        // This will be empty, but it should exist
        Element jsonDivElement = jsonElementDivBlock.get(0);

        if (jsonDivElement != null) {
            // Remove any script tags from it, in case the user populated
            // the template incorrectly with data
            if (jsonDivElement.children().size() > 0) {
                Elements children = jsonDivElement.children();
                for (int i = 0; i < children.size(); i++) {
                    Element el = children.get(i);
                    el.remove();
                }
            }

            addNewScriptElementWithJson(jsonDivElement, jsonComponentList);
            addNewScriptElementWithJson(jsonDivElement, jsonPropertyList);
        } else {
            log.error("Unable to find a valid critical DIV inside HTML template: "
                    + NRTConstants.HTML_JSON_DATA_BLOCK);
        }
        writer = new PrintWriter(finalHtmlOutput, "UTF-8");
        // Write out the file
        writer.write(doc.html());
        writer.flush();
        writer.close();

    } catch (Exception e) {
        log.error("Unable to write out final report file!", e);
    } finally {
        writer.close();
    }

}

From source file:com.kingfong.webcrawler.util.DOMContentUtils.java

/**
 * This method finds all anchors below the supplied DOM
 * <code>node</code>, and creates appropriate {@link Outlink}
 * records for each (relative to the supplied <code>base</code>
 * URL), and adds them to the <code>outlinks</code> {@link
 * ArrayList}.//from   w ww  .  j a va 2 s . c o m
 *
 * <p>
 *
 * Links without inner structure (tags, text, etc) are discarded, as
 * are links which contain only single nested links and empty text
 * nodes (this is a common DOM-fixup artifact, at least with
 * nekohtml).
 */
public void getOutlinks(String html, URL url, HashSet<String> outlinks) {

    Document document = Jsoup.parse(html);
    Elements elements = document.getAllElements();
    for (Element currentNode : elements) {
        String nodeName = currentNode.tagName();
        // short nodeType = currentNode.;
        Elements children = currentNode.children();
        nodeName = nodeName.toLowerCase();
        LinkParams params = linkParams.get(nodeName);
        if (params != null) {
            // if (!shouldThrowAwayLink(currentNode, children, childLen,
            // params)) {

            // StringBuilder linkText = new StringBuilder();
            // getText(linkText, currentNode, true);

            Attributes attrs = currentNode.attributes();
            String target = null;
            boolean noFollow = false;
            boolean post = false;
            Iterator<Attribute> iterator = attrs.iterator();
            while (iterator.hasNext()) {
                Attribute attr = iterator.next();
                String attrName = attr.getKey();
                if (params.attrName.equalsIgnoreCase(attrName)) {
                    target = attr.getValue();
                } else if ("rel".equalsIgnoreCase(attrName) && "nofollow".equalsIgnoreCase(attr.getValue())) {
                    noFollow = true;
                } else if ("method".equalsIgnoreCase(attrName) && "post".equalsIgnoreCase(attr.getValue())) {
                    post = true;
                }
            }
            if (StringUtils.startsWith(target, "/")) {
                target = url.getProtocol() + "://" + url.getHost() + target;
            }
            if (target != null && URLFilter.filt(target)) {
                outlinks.add(target);
            }
            // }
            // this should not have any children, skip them
            if (params.childLen == 0)
                continue;
        }
    }
}

From source file:de.geeksfactory.opacclient.apis.Littera.java

protected SearchRequestResult executeSearch(List<SearchQuery> query, int pageIndex)
        throws IOException, OpacErrorException, JSONException {
    final String searchUrl;
    if (!initialised) {
        start();/* w  w w.  j  a  va2s.  c  o m*/
    }
    try {
        searchUrl = buildSearchUrl(query, pageIndex);
    } catch (URISyntaxException e) {
        throw new RuntimeException(e);
    }
    final String html = httpGet(searchUrl, getDefaultEncoding());
    final Document doc = Jsoup.parse(html);

    final Element navigation = doc.select(".result_view .navigation").first();
    final int totalResults = navigation != null ? parseTotalResults(navigation.text()) : 0;

    final Element ul = doc.select(".result_view ul.list").first();
    final List<SearchResult> results = new ArrayList<>();
    for (final Element li : ul.children()) {
        if (li.hasClass("zugangsmonat")) {
            continue;
        }
        final SearchResult result = new SearchResult();
        final Element title = li.select(".titelinfo a").first();
        result.setId(getQueryParamsFirst(title.attr("href")).get("id"));
        result.setInnerhtml(title.text() + "<br>" + title.parent().nextElementSibling().text());
        result.setNr(results.size());
        result.setPage(pageIndex);
        result.setType(MEDIA_TYPES.get(li.select(".statusinfo .ma").text()));
        result.setCover(getCover(li));
        final String statusImg = li.select(".status img").attr("src");
        result.setStatus(statusImg.contains("-yes") ? SearchResult.Status.GREEN
                : statusImg.contains("-no") ? SearchResult.Status.RED : null);
        results.add(result);
    }
    return new SearchRequestResult(results, totalResults, pageIndex);
}

From source file:mobi.jenkinsci.ci.client.JenkinsClient.java

private HashMap<String, Issue> getIssuesFromTable(final Element changesTable) {
    final HashMap<String, Issue> issues = new HashMap<String, ChangeSetItem.Issue>();
    if (changesTable == null) {
        return issues;
    }/*from   w ww  .ja  va 2 s .  c  o  m*/

    if (changesTable.children().size() <= 0) {
        LOG.warn("Cannot find changes TBODY");
        return issues;
    }

    final Element tbody = changesTable.child(0);
    final Elements rows = tbody.children();
    for (final Element row : rows) {
        final String commitId = getCommitIdFromRow(row);
        Issue issue;
        try {
            issue = getIssueFromRow(row);
            if (issue != null) {
                issues.put(commitId, issue);
            }
        } catch (final MalformedURLException e) {
            LOG.warn("Invalid issue URL for row " + row.toString() + ": skipping", e);
        }
    }

    return issues;
}

From source file:org.apache.sling.hapi.client.impl.microdata.MicrodataDocument.java

private List<Item> selectItems(Element e, List<Item> items) {
    if (e.hasAttr("itemscope") && !e.hasAttr("itemprop")) {
        items.add(new ItemImpl(e, this));
        return items;
    }//from www  . j  a  v  a 2  s. c om

    for (Element c : e.children()) {
        selectItems(c, items);
    }

    return items;
}

From source file:edu.ucla.cs.scai.swim.qa.ontology.dbpedia.DBpediaOntologyOld.java

private void traverseHierarchy(Element e, DBpediaCategory category, HashMap<String, DBpediaCategory> map) {
    for (Element c : e.children()) {
        String tagName = c.tag().getName();
        if (tagName.equals("a")) {
            String href = c.attr("href");
            if (href != null && href.length() > 0) {
                category.setLabel(c.text());
                category.setUri(CLASSES_BASE_URI + c.text());
                map.put(category.getLabel(), category);
                System.out.println(c.text() + "\t" + CLASSES_BASE_URI + c.text());
            }//  w w w  .  ja va  2  s .c  om
        } else if (tagName.equals("ul")) {
            for (Element c1 : c.children()) {
                if (c1.tagName().equals("li")) {
                    DBpediaCategory cc = new DBpediaCategory();
                    traverseHierarchy(c1, cc, map);
                    cc.parents = new HashSet<>();
                    cc.parents.add(category);
                    category.getSubClasses().add(cc);
                }
            }
        }
    }
}

From source file:com.johan.vertretungsplan.parser.UntisInfoParser.java

@Override
public Vertretungsplan getVertretungsplan() throws IOException, JSONException {
    new LoginHandler(schule).handleLogin(executor, cookieStore, username, password);

    Document navbarDoc = Jsoup.parse(getNavbarDoc().replace("&nbsp;", ""));
    Element select = navbarDoc.select("select[name=week]").first();

    Vertretungsplan v = new Vertretungsplan();
    List<VertretungsplanTag> tage = new ArrayList<VertretungsplanTag>();

    String info = navbarDoc.select(".description").text();
    String stand;/* w w  w .  j  a  v  a2  s  .c o m*/
    try {
        stand = info.substring(info.indexOf("Stand:"));
    } catch (Exception e) {
        stand = "";
    }

    for (Element option : select.children()) {
        String week = option.attr("value");
        String letter = data.optString("letter", "w");
        if (data.optBoolean("single_classes", false)) {
            int classNumber = 1;
            for (String klasse : getAllClasses()) {
                String paddedNumber = String.format("%05d", classNumber);
                String url;
                if (data.optBoolean("w_after_number", false))
                    url = baseUrl + "/" + week + "/" + letter + "/" + letter + paddedNumber + ".htm";
                else
                    url = baseUrl + "/" + letter + "/" + week + "/" + letter + paddedNumber + ".htm";

                Document doc = Jsoup.parse(httpGet(url, schule.getData().getString("encoding")));
                Elements days = doc.select("#vertretung > p > b, #vertretung > b");
                for (Element day : days) {
                    VertretungsplanTag tag = getTagByDatum(tage, day.text());
                    tag.setStand(stand);
                    tag.setDatum(day.text());
                    Element next = null;
                    if (day.parent().tagName().equals("p")) {
                        next = day.parent().nextElementSibling().nextElementSibling();
                    } else
                        next = day.parent().select("p").first().nextElementSibling();
                    if (next.className().equals("subst")) {
                        //Vertretungstabelle
                        if (next.text().contains("Vertretungen sind nicht freigegeben"))
                            continue;
                        parseVertretungsplanTable(next, data, tag);
                    } else {
                        //Nachrichten
                        parseNachrichten(next, data, tag);
                        next = next.nextElementSibling().nextElementSibling();
                        parseVertretungsplanTable(next, data, tag);
                    }
                    writeTagByDatum(tage, tag);
                }

                classNumber++;
            }
        } else {
            String url;
            if (data.optBoolean("w_after_number", false))
                url = baseUrl + "/" + week + "/" + letter + "/" + letter + "00000.htm";
            else
                url = baseUrl + "/" + letter + "/" + week + "/" + letter + "00000.htm";
            Document doc = Jsoup.parse(httpGet(url, schule.getData().getString("encoding")));
            Elements days = doc.select("#vertretung > p > b, #vertretung > b");
            for (Element day : days) {
                VertretungsplanTag tag = getTagByDatum(tage, day.text());
                tag.setStand(stand);
                tag.setDatum(day.text());
                Element next = null;
                if (day.parent().tagName().equals("p")) {
                    next = day.parent().nextElementSibling().nextElementSibling();
                } else
                    next = day.parent().select("p").first().nextElementSibling();
                if (next.className().equals("subst")) {
                    //Vertretungstabelle
                    if (next.text().contains("Vertretungen sind nicht freigegeben"))
                        continue;
                    parseVertretungsplanTable(next, data, tag);
                } else {
                    //Nachrichten
                    parseNachrichten(next, data, tag);
                    next = next.nextElementSibling().nextElementSibling();
                    parseVertretungsplanTable(next, data, tag);
                }
                tage.add(tag);
            }
        }
        v.setTage(tage);
    }
    return v;
}