Example usage for org.jsoup.nodes Element children

List of usage examples for org.jsoup.nodes Element children

Introduction

In this page you can find the example usage for org.jsoup.nodes Element children.

Prototype

public Elements children() 

Source Link

Document

Get this element's child elements.

Usage

From source file:gov.medicaid.screening.dao.impl.PodiatricMedicineLicenseDAOBean.java

/**
 * Retrieves all results from the source site.
 *
 * @param criteria the search criteria.//from w w  w.j a  v a  2 s. co  m
 * @return the providers matched
 * @throws URISyntaxException if the URL could not be correctly constructed
 * @throws IOException for any I/O related errors
 * @throws ServiceException for any other errors encountered
 */
private SearchResult<License> getAllResults(String criteria)
        throws URISyntaxException, IOException, ServiceException {
    DefaultHttpClient client = new DefaultHttpClient();
    client.setRedirectStrategy(new LaxRedirectStrategy());

    HttpGet getFrontPage = new HttpGet(new URIBuilder(getSearchURL()).build());
    HttpResponse response = client.execute(getFrontPage);

    verifyAndAuditCall(getSearchURL(), response);

    Document page = Jsoup.parse(EntityUtils.toString(response.getEntity()));

    HttpPost getSearchPage = new HttpPost(new URIBuilder(getSearchURL()).build());
    HttpEntity entity = postForm(getSearchURL(), client, getSearchPage,
            new String[][] { { "_ctl2:dropAgencyCode", "H7Q" }, { "_ctl2:btnLogin", "Login" },
                    { "__VIEWSTATE", page.select("#__aspnetForm input[name=__VIEWSTATE]").first().val() } },
            true);

    page = Jsoup.parse(EntityUtils.toString(entity));

    HttpPost search = new HttpPost(new URIBuilder(getSearchURL()).build());
    entity = postForm(getSearchURL(), client, search,
            new String[][] { { "_ctl2:txtCriteria", criteria }, { "_ctl2:btnSearch", "Search" },
                    { "__VIEWSTATE", page.select("#__aspnetForm input[name=__VIEWSTATE]").first().val() } },
            true);

    page = Jsoup.parse(EntityUtils.toString(entity));

    List<License> allLicenses = new ArrayList<License>();
    Elements rows = page.select("table#_ctl2_dgrdResults tr.DataGrid");
    for (Element row : rows) {
        License license = parseLicense(row.children());
        if (license != null) {
            allLicenses.add(license);
        }
    }
    SearchResult<License> results = new SearchResult<License>();
    results.setItems(allLicenses);
    return results;
}

From source file:feedzilla.Feed.java

private void parser(Element entry) {
    boolean source = false;
    for (Element element : entry.children()) {
        switch (element.nodeName()) {
        case "id":
            this.id = Integer.parseInt(element.text().split(":")[1]);
            break;
        case "title":
            if (source) {
                this.source_title = element.text();
            } else {
                this.title = element.text();
            }//from  w  ww  .j  av a  2  s.c  o  m
            break;
        case "summary":
            this.summary = element.text().split("<br")[0];
            break;
        case "published":
            this.published = element.text();
            break;
        case "updated":
            this.updated = element.text();
            break;
        case "author":
            this.author = element.text();
            break;
        case "link":
            if (source) {
                this.source_link = element.attr("href");
            } else {
                this.link = element.attr("href");
            }
            break;
        case "rights":
            this.copyright = element.text();
            break;
        case "source":
            source = true;
            break;
        default:
            Log.debug("Unknow TAG: " + element.nodeName());
            break;
        }
    }
}

From source file:org.abondar.experimental.eventsearch.EventFinder.java

public String getEventPlaces(String place) {

    String res = "";
    try {/*from  w  ww  .  j av a 2s .  c o  m*/
        Document placeDoc = Jsoup.connect("https://afisha.yandex.ru" + place).get();

        Elements elems = placeDoc.select("p");

        for (Element e : elems) {

            if (e.parents().get(1).html().contains("<div style")) {

                if (e.children().size() > 1) {
                    if (e.child(1).hasAttr("href")) {
                        res = e.child(1).html() + " ?";

                    }
                } else if (e.children().isEmpty()) {
                    res = e.html() + " ?";
                }
            }

        }

    } catch (IOException ex) {
        Logger.getLogger(EventFinder.class.getName()).log(Level.SEVERE, null, ex);
    }
    return res;
}

From source file:gov.medicaid.screening.dao.impl.MedicaidCertifiedProvidersDAOBean.java

/**
 * Retrieves all results from the source site.
 *
 * @param criteria the search criteria.//from w ww. j av a 2  s .co  m
 * @return the providers matched
 * @throws URISyntaxException if the URL could not be correctly constructed
 * @throws IOException for any I/O related errors
 * @throws ServiceException for any other errors encountered
 */
private SearchResult<ProviderProfile> getAllResults(MedicaidCertifiedProviderSearchCriteria criteria)
        throws URISyntaxException, IOException, ServiceException {
    DefaultHttpClient client = new DefaultHttpClient();
    client.setRedirectStrategy(new LaxRedirectStrategy());

    // we need to get a token from the start page, this will be stored in the client
    HttpGet getFrontPage = new HttpGet(new URIBuilder(getSearchURL()).build());
    HttpResponse response = client.execute(getFrontPage);
    verifyAndAuditCall(getSearchURL(), response);
    EntityUtils.consume(response.getEntity()); // releases the connection

    // our client is now valid, pass the criteria to the search page
    String postSearchURL = Util.replaceLastURLPart(getSearchURL(), "showprovideroutput.cfm");
    HttpPost searchPage = new HttpPost(new URIBuilder(postSearchURL).build());
    HttpEntity entity = postForm(postSearchURL, client, searchPage,
            new String[][] { { "ProviderCatagory", criteria.getType() },
                    { "WhichArea", criteria.getCriteria() }, { "Submit", "Submit" },
                    { "SelectCounty", "All".equals(criteria.getCriteria()) ? "0" : criteria.getValue() },
                    { "CityToFind", "All".equals(criteria.getCriteria()) ? "" : criteria.getValue() },
                    { "ProviderToFind", "All".equals(criteria.getCriteria()) ? "" : criteria.getValue() } },
            true);

    // this now holds the search results, parse every row
    Document page = Jsoup.parse(EntityUtils.toString(entity));
    List<ProviderProfile> allProviders = new ArrayList<ProviderProfile>();
    Elements rows = page.select("div#body table tbody tr:gt(0)");
    for (Element row : rows) {
        ProviderProfile profile = parseProfile(row.children());
        if (profile != null) {
            allProviders.add(profile);
        }
    }

    SearchResult<ProviderProfile> results = new SearchResult<ProviderProfile>();
    results.setItems(allProviders);
    return results;
}

From source file:MySpaceParser.java

private void parseSingleFile(File file) throws Exception {

    Document htmlFile = null;//from   w w  w . ja  va2 s .co m
    try {

        htmlFile = Jsoup.parse(file, "ISO-8859-1");
    } catch (Exception e) {
        e.printStackTrace();
    }
    // Elements parents =htmlFile.getElementsByClass("cover");

    Elements parents = htmlFile.getElementsByTag("section");

    String title = "*^*";
    String artist = "*^*";
    String url = "*^*";
    String imageurl = "*^*";
    String pageTitle = "*^*";
    String description = "*^*";
    String songid = "*^*";
    String genre = "*^*";
    String album = "*^*";
    String year = "*^*";
    boolean isVideo = false;
    Elements titles = htmlFile.getElementsByTag("title");
    Elements metas = htmlFile.getElementsByTag("meta");
    for (Element meta : metas) {
        String name = meta.attr("name");
        String prop = meta.attr("property");
        if (prop.equals("og:video")) {
            System.out.println();
            url = meta.attr("content");
            String arr[] = url.split("/");
            songid = arr[arr.length - 1];
            title = arr[arr.length - 2];
            artist = arr[arr.length - 4];
            isVideo = true;
        }
        if (name.equals("description")) {
            // System.out.println();
            description = meta.attr("content");
        }
    }
    for (Element Pagetitle : titles) {
        pageTitle = Pagetitle.html();
        // System.out.println(pageTitle);
        break;
    }

    if (isVideo) {
        SongData s = new SongData(title, url, album, artist, year, genre, imageurl);
        s.setPagetitle(pageTitle);
        s.setDescrption(description);
        index.put(songid, s);
        return;
    }
    if (parents.isEmpty() && !isVideo) {
        return;
    } else {
        // boolean isVideo = false;
        titles = htmlFile.getElementsByTag("title");
        metas = htmlFile.getElementsByTag("meta");
        for (Element meta : metas) {
            String name = meta.attr("name");
            String prop = meta.attr("property");
            if (prop.equals("og:video")) {
                System.out.println();
                url = meta.attr("content");
                String arr[] = url.split("/");
                songid = arr[arr.length - 1];
                isVideo = true;
            }
            if (name.equals("description")) {
                // System.out.println();
                description = meta.attr("content");
            }
        }
        for (Element Pagetitle : titles) {
            pageTitle = Pagetitle.html();
            // System.out.println(pageTitle);
            break;
        }

        for (Element e : parents) {
            if (e.attr("id").equals("song")) {
                Elements e1 = e.children();
                for (Element e2 : e1) {
                    if (e2.attr("id").equals("actions")) {
                        Elements e3 = e2.children();
                        int count = 0;
                        for (Element e4 : e3) {

                            if (count == 1) {
                                songid = e4.attr("data-song-id");
                                album = e4.attr("data-album-title");
                                title = e4.attr("data-title");
                                artist = e4.attr("data-artist-name");
                                url = "www.myspace.com" + e4.attr("data-url");
                                genre = e4.attr("data-genre-name");
                                imageurl = e4.attr("data-image-url");
                                SongData s = new SongData(title, url, album, artist, year, genre, imageurl);
                                s.setPagetitle(pageTitle);
                                s.setDescrption(description);
                                index.put(songid, s);
                            }
                            count++;
                        }
                        // System.out.println();
                    }
                }

                // System.out.println(e.attr("id"));
            }

        }
        //System.out.println();

    }

}

From source file:gov.medicaid.screening.dao.impl.HealthOccupationsProgramCredentialDAOBean.java

/**
 * Performs a search for all possible results.
 *
 * @param criteria The search criteria.// ww  w  . ja  v a 2s  . c  om
 * @return the search result for licenses
 *
 * @throws URISyntaxException if an error occurs while building the URL.
 * @throws ClientProtocolException if client does not support protocol used.
 * @throws IOException if an error occurs while parsing response.
 * @throws ParseException if an error occurs while parsing response.
 * @throws ServiceException for any other problems encountered
 */
private SearchResult<ProviderProfile> getAllResults(HealthOccupationsProgramCredentialSearchCriteria criteria)
        throws URISyntaxException, ClientProtocolException, IOException, ParseException, ServiceException {
    DefaultHttpClient client = new DefaultHttpClient(getLaxSSLConnectionManager());
    client.setRedirectStrategy(new LaxRedirectStrategy());

    HttpGet getSearch = new HttpGet(new URIBuilder(getSearchURL()).build());
    HttpResponse response = client.execute(getSearch);

    verifyAndAuditCall(getSearchURL(), response);
    EntityUtils.consume(response.getEntity());

    String fullSearchURL = Util.replaceLastURLPart(getSearchURL(), "credential_search.do");
    HttpPost search = new HttpPost(new URIBuilder(fullSearchURL).build());
    List<ProviderProfile> allProfiles = new ArrayList<ProviderProfile>();

    HttpEntity entity = postForm(fullSearchURL, client, search,
            new String[][] { { "city", Util.defaultString(criteria.getCity()) },
                    { "credentialNumber", Util.defaultLongString(criteria.getCredentialNumber()) },
                    { "firstName", Util.defaultString(criteria.getFirstName()) },
                    { "lastName", Util.defaultString(criteria.getLastName()) },
                    { "county", getDefaultValue(criteria.getCounty()) },
                    { "credentialStatus", getDefaultValue(criteria.getStatus()) },
                    { "credentialType", getDefaultValue(criteria.getOccupationType()) },
                    { "discipline", getDefaultValue(criteria.getDiscipline()) },
                    { "state", getDefaultValue(criteria.getState()) }, { "p_action", "search" } },
            false);

    Document page = Jsoup.parse(EntityUtils.toString(entity));

    Elements rows = page.select("table.formTable tr:gt(0)");
    for (Element row : rows) {
        allProfiles.add(parseProfile(row.children()));
    }

    SearchResult<ProviderProfile> searchResult = new SearchResult<ProviderProfile>();
    searchResult.setItems(allProfiles);
    return searchResult;
}

From source file:gov.medicaid.screening.dao.impl.DieteticsAndNutritionPracticeLicenseDAOBean.java

/**
 * Performs a search for all possible results.
 *
 * @param identifier The value to be searched.
 * @return the search result for licenses
 * @throws URISyntaxException When an error occurs while building the URL.
 * @throws ClientProtocolException When client does not support protocol used.
 * @throws IOException When an error occurs while parsing response.
 * @throws ParseException When an error occurs while parsing response.
 * @throws PersistenceException for database related errors
 * @throws ServiceException for any other errors
 *//*  www .j  a v  a2  s  .c o  m*/
private SearchResult<License> getAllResults(String identifier) throws URISyntaxException,
        ClientProtocolException, IOException, ParseException, PersistenceException, ServiceException {
    DefaultHttpClient client = new DefaultHttpClient();
    URIBuilder builder = new URIBuilder(getSearchURL());
    String hostId = builder.build().toString();

    HttpGet httpget = new HttpGet(builder.build());
    HttpResponse landing = client.execute(httpget);
    Document document = Jsoup.parse(EntityUtils.toString(landing.getEntity()));

    HttpPost httppost = new HttpPost(builder.build());
    HttpEntity entity = postForm(hostId, client, httppost,
            new String[][] { { "_ctl0:_ctl1:_ctl0:txtCriteria", identifier },
                    { "_ctl0:_ctl1:_ctl0:btnSubmit", "Search" }, { "__EVENTTARGET", "" },
                    { "__EVENTARGUMENT", "" },
                    { "__VIEWSTATE", document.select("#Form input[name=__VIEWSTATE]").first().val() } },
            true);

    // licenses list
    List<License> licenseList = new ArrayList<License>();
    while (entity != null) {
        String result = EntityUtils.toString(entity);
        document = Jsoup.parse(result);

        Elements trs = document.select(GRID_ROW_SELECTOR);
        if (trs != null) {
            for (Element element : trs) {
                licenseList.add(parseLicense(element.children()));
            }
        }

        // done, check if there are additional results
        entity = null;
        Elements elements = document.getElementsByTag("a");
        for (Element element : elements) {
            if (element.text().equals("Next >>")) {
                entity = postForm(hostId, client, httppost,
                        new String[][] { { "_ctl0:_ctl1:_ctl0:txtCriteria", identifier },
                                { "__EVENTTARGET", "_ctl0:_ctl1:_ctl0:dgrdLicensee:_ctl29:_ctl1" },
                                { "__EVENTARGUMENT", "" },
                                { "__VIEWSTATE",
                                        document.select("#Form input[name=__VIEWSTATE]").first().val() } },
                        true);
                break;
            }
        }
    }

    SearchResult<License> result = new SearchResult<License>();
    result.setItems(licenseList);
    return result;
}

From source file:web.analyzer.utils.Utils.java

public List<Heading> docHeadingsProcess(Document doc) {
    List<Heading> headingList = new ArrayList<Heading>();
    int level = 0;
    Elements eles = doc.select("*");
    for (Element ele : eles) {
        level++;//  w w w  . j  a  va 2 s  .  c om
        if (HEADING_TAG.contains(ele.tagName())) {
            headingList.add(new Heading(ele.tagName(), ele.html(), level));
        }

        if (ele.children().size() == 0) {
            level = 0;
            continue;
        } else {
            eles = ele.children();
        }
    }

    return headingList;
}

From source file:gov.medicaid.screening.dao.impl.MarriageAndFamilyTherapyLicenseDAOBean.java

/**
 * Performs a search for all possible results.
 *
 * @param criteria The search criteria.//from  w  w w .  j  a v  a 2s  .c  om
 * @param identifier The value to be searched.
 * @param host The host where to perform search.
 * @param pageNumber The page number requested
 * @return the search result for licenses
 * @throws URISyntaxException When an error occurs while building the URL.
 * @throws IOException When an error occurs while parsing response.
 * @throws ParseException When an error occurs while parsing response.
 * @throws PersistenceException if any db related error is encountered
 * @throws ServiceException When an error occurs while trying to perform search.
 */
private SearchResult<License> getAllResults(String criteria, String identifier, String host, int pageNumber)
        throws URISyntaxException, ParseException, PersistenceException, IOException, ServiceException {
    HttpClient client = new DefaultHttpClient();
    URIBuilder builder = new URIBuilder(host).setPath("/search.asp");
    String hostId = builder.build().toString();

    builder.setParameter("qry", criteria).setParameter("crit", identifier).setParameter("p", "s")
            .setParameter("rsp", pageNumber + "");

    URI uri = builder.build();
    HttpGet httpget = new HttpGet(uri);

    SearchResult<License> searchResults = new SearchResult<License>();

    HttpResponse response = client.execute(httpget);
    int statusCode = response.getStatusLine().getStatusCode();
    if (statusCode == HttpStatus.SC_OK) {

        HttpEntity entity = response.getEntity();
        SearchResult<License> nextResults = null;
        // licenses list
        List<License> licenseList = new ArrayList<License>();
        if (entity != null) {
            String result = EntityUtils.toString(entity);
            Document document = Jsoup.parse(result);
            Elements trs = document.select("tr[bgcolor]");
            for (Element tr : trs) {
                Elements tds = tr.children();
                licenseList.add(parseLicenseInfo(tds));
            }
            // check if there is next page
            Element next = document.select("a:containsOwn(Next)").first();
            if (next != null) {
                nextResults = getAllResults(criteria, identifier, host, pageNumber + 1);
            }
            if (nextResults != null) {
                licenseList.addAll(nextResults.getItems());
            }
        }

        searchResults.setItems(licenseList);
    }
    verifyAndAuditCall(hostId, response);

    return searchResults;
}

From source file:by.heap.remark.convert.TextCleaner.java

private void fixLineBreaks(Element el) {
    for (final Element e : el.children()) {
        if (e.tagName().equals("br")) {
            e.before("\n");
            e.remove();//w  ww. j  a  va 2 s .  c o m
        } else {
            fixLineBreaks(e);
        }
    }
}